[{"title":"GPT Image | v2 | Text to Image","slug":"gpt-image-v2-text-to-image","version":"0.0.1","output_type":"array","request_schema":{"properties":{},"type":"object"}},{"title":"GPT Image | v2 | Edit","slug":"gpt-image-v2-edit","version":"0.0.1","output_type":"array","request_schema":{"properties":{},"type":"object"}},{"title":"Gemini 2.5 Flash","slug":"gemini-2-5-flash","version":"0.0.1","output_type":"text","request_schema":{"properties":{},"type":"object"}},{"title":"ACE-Step 1.5 | Text to Music","slug":"ace-step-1-5-text-to-music","version":"0.0.1","output_type":"audio","request_schema":{"properties":{"bpm":{"default":"","description":"Beats per minute. Leave empty for auto-detect.","maximum":0,"minimum":0,"type":"integer"},"duration":{"default":30,"description":"Length of generated audio in seconds. Default 30. Billed per output second.","maximum":0,"minimum":0,"type":"integer"},"guidance_scale":{"default":7,"description":"Classifier-free guidance strength (base models only, ignored on turbo). Default 7.","maximum":0,"minimum":0,"type":"number"},"infer_method":{"default":"ode","description":"Diffusion solver. ode (default) is deterministic; sde is stochastic.","enum":["[{\"label\":\"ODE (Euler","deterministic)\"","\"value\":\"ode\"}","{\"label\":\"SDE (stochastic","varied)\"","\"value\":\"sde\"}]"],"type":"string"},"key_scale":{"default":"","description":"Musical key and mode, e.g. \"C Major\", \"Am\", \"F# minor\". Empty for auto-detect.","type":"string"},"lm_cfg_scale":{"default":1,"description":"LM classifier-free guidance scale. Default 1. WARNING: \u003e1.0 triggers ~10x slower step-by-step CFG.","maximum":0,"minimum":0,"type":"number"},"lm_negative_prompt":{"default":"NO USER INPUT","description":"Negative prompt for LM stage.","type":"string"},"lm_temperature":{"default":0.85,"description":"LM sampling temperature when thinking is enabled. Default 0.85.","maximum":0,"minimum":0,"type":"number"},"lyrics":{"default":"","description":"Lyrics text. Use [verse], [chorus], [bridge] for structure, or [inst]/[instrumental] for a vocals-free track. Max 4096 chars. Multilingual.","type":"string"},"num_inference_steps":{"default":8,"description":"Number of denoising steps. Default 8 (turbo). Base: 32-64.","maximum":0,"minimum":0,"type":"integer"},"num_outputs":{"default":1,"description":"How many distinct audio samples to generate per request. Default 1. Each additional output is charged at the full rate.","maximum":0,"minimum":0,"type":"integer"},"prompt":{"default":"","description":"Music description or caption (style, instruments, mood). Max 512 chars. Drives the musical identity.","type":"string"},"seed":{"default":"","description":"Random seed for reproducible results. Leave empty for random.","maximum":0,"minimum":0,"type":"integer"},"shift":{"default":3,"description":"Timestep shift factor. Default 3.0 (recommended for turbo).","maximum":0,"minimum":0,"type":"number"},"thinking":{"default":true,"description":"Enable the 5Hz LM Chain-of-Thought stage for higher quality. Default true. WARNING: 2x cost per output second.","type":"boolean"},"time_signature":{"default":"","description":"Rhythmic grouping. Empty = auto-detect.","enum":["[{\"label\":\"Auto\"","\"value\":\"\"}","{\"label\":\"2/4\"","\"value\":\"2\"}","{\"label\":\"3/4\"","\"value\":\"3\"}","{\"label\":\"4/4\"","\"value\":\"4\"}","{\"label\":\"6/8\"","\"value\":\"6\"}]"],"type":"string"},"use_constrained_decoding":{"default":true,"description":"Enable constrained decoding for reliable metadata parsing. Default true.","type":"boolean"},"vocal_language":{"default":"unknown","description":"Language code for vocals (en, zh, ja, ko, es, fr, de, etc.). Use \"unknown\" to auto-detect from lyrics.","type":"string"}},"type":"object"}},{"title":"Alibaba | Qwen3 ASR Flash Filetrans | Speech to Text","slug":"alibaba-qwen3-asr-flash-filetrans-speech-to-text","version":"0.0.1","output_type":"object","request_schema":{"properties":{"audio_url":{"default":"","description":"URL of the audio file to transcribe. Supports aac, amr, flac, m4a, mp3, ogg, opus, wav, webm, wma, wmv and video containers. Max 2GB, up to 12 hours.","type":"url"},"channel_id":{"default":"","description":"Audio track indices for multi-channel audio (0-indexed, JSON array). Example: [0, 1] for first two tracks. Each track billed separately. Default: [0].","type":"text"},"context_text":{"default":"","description":"Domain-specific vocabulary or context to improve recognition accuracy. Up to 10,000 tokens. Useful for medical, legal, or brand-specific terminology.","type":"text"},"enable_itn":{"default":false,"description":"Convert spoken number forms to written digits (e.g., one hundred to 100). Chinese and English only.","type":"boolean"},"enable_words":{"default":false,"description":"Return word-level timestamps with per-word timing. Also improves sentence segmentation. Supported for Chinese, English, Japanese, Korean, German, French, Spanish, Italian, Portuguese, Russian.","type":"boolean"},"language":{"default":"","description":"Language of the audio. Leave empty for automatic multilingual detection. zh: Chinese, yue: Cantonese, en: English, ja: Japanese, de: German, ko: Korean, ru: Russian, fr: French, pt: Portuguese, ar: Arabic, it: Italian, es: Spanish, hi: Hindi, id: Indonesian, th: Thai, tr: Turkish, uk: Ukrainian, vi: Vietnamese, cs: Czech, da: Danish, fil: Filipino, fi: Finnish, is: Icelandic, ms: Malay, no: Norwegian, pl: Polish, sv: Swedish.","type":"select"}},"required":["audio_url"],"type":"object"}},{"title":"Bytedance | Seedance 2.0 | Reference to Video","slug":"bytedance-seedance-2-0-reference-to-video","version":"0.0.1","output_type":"video","request_schema":{"properties":{"aspect_ratio":{"default":"auto","description":"The aspect ratio of the generated video. Use 16:9 for landscape, 9:16 for portrait/vertical, 1:1 for square, 21:9 for ultrawide cinematic, or auto to let the model decide.","enum":["auto","21:9","16:9","4:3","1:1","3:4","9:16"],"type":"string"},"audio_urls":{"default":"","description":"Reference audio to guide video generation. Refer to them in the prompt as @Audio1, @Audio2, etc. Supported formats: MP3, WAV. Up to 3 files, combined duration must not exceed 15 seconds. Max 15 MB per file.If audio is provided, at least one reference image or video is required.","type":"string"},"duration":{"default":"auto","description":"Duration of the video in seconds. Supports 4 to 15 seconds, or auto to let the model decide based on the prompt.","enum":["auto","4","5","6","7","8","9","10","11","12","13","14","15"],"type":"string"},"end_user_id":{"default":"","description":"The unique user ID of the end user.","type":"string"},"generate_audio":{"default":true,"description":"Whether to generate synchronized audio for the video, including sound effects, ambient sounds, and lip-synced speech. The cost of video generation is the same regardless of whether audio is generated or not.","type":"boolean"},"image_urls":{"default":"","description":"Reference images to guide video generation. Refer to them in the prompt as @Image1, @Image2, etc. Supported formats: JPEG, PNG, WebP. Max 30 MB per image. Up to 9 images. Total files across all modalities must not exceed 12.","items":{"type":"image"},"type":"array"},"prompt":{"default":"","description":"The text prompt used to generate the video.","type":"string"},"resolution":{"default":"720p","description":"Video resolution - 480p for faster generation, 720p for balance.","enum":["480p","720p"],"type":"string"},"seed":{"default":"","description":"Random seed for reproducibility. Note that results may still vary slightly even with the same seed.","type":"string"},"video_urls":{"default":"","description":"Reference videos to guide video generation. Refer to them in the prompt as @Video1, @Video2, etc. Supported formats: MP4, MOV. Up to 3 videos, combined duration must be between 2 and 15 seconds, total size under 50 MB. Each video must be between ~480p (640x640) and ~720p (834x1112) in resolution.","type":"string"}},"required":["prompt"],"type":"object"}},{"title":"Bytedance | Seedance 2.0 | Image to Video","slug":"bytedance-seedance-2-0-image-to-video","version":"0.0.1","output_type":"video","request_schema":{"properties":{"aspect_ratio":{"default":"auto","description":"The aspect ratio of the generated video. Use 16:9 for landscape, 9:16 for portrait/vertical, 1:1 for square, 21:9 for ultrawide cinematic, or auto to infer from the input image.","enum":["auto","21:9","16:9","4:3","1:1","3:4","9:16"],"type":"string"},"duration":{"default":"auto","description":"Duration of the video in seconds. Supports 4 to 15 seconds, or auto to let the model decide based on the prompt.","enum":["auto","4","5","6","7","8","9","10","11","12","13","14","15"],"type":"string"},"end_image_url":{"default":"","description":"The URL of the image to use as the last frame of the video. When provided, the generated video will transition from the starting image to this ending image. Supported formats: JPEG, PNG, WebP. Max 30 MB.","type":"string"},"end_user_id":{"default":"","description":"The unique user ID of the end user.","type":"string"},"generate_audio":{"default":true,"description":"Whether to generate synchronized audio for the video, including sound effects, ambient sounds, and lip-synced speech. The cost of video generation is the same regardless of whether audio is generated or not.","type":"boolean"},"image_url":{"default":"","description":"The URL of the starting frame image to animate. Supported formats: JPEG, PNG, WebP. Max 30 MB.","type":"string"},"prompt":{"default":"","description":"The text prompt describing the desired motion and action for the video.","type":"string"},"resolution":{"default":"720p","description":"Video resolution - 480p for faster generation, 720p for balance.","enum":["480p","720p"],"type":"string"},"seed":{"default":"","description":"Random seed for reproducibility. Note that results may still vary slightly even with the same seed.","type":"string"}},"required":["prompt","image_url"],"type":"object"}},{"title":"Bytedance | Seedance 2.0 | Text to Video","slug":"bytedance-seedance-2-0-text-to-video","version":"0.0.1","output_type":"video","request_schema":{"properties":{"aspect_ratio":{"default":"auto","description":"The aspect ratio of the generated video. Use 16:9 for landscape, 9:16 for portrait/vertical, 1:1 for square, 21:9 for ultrawide cinematic, or auto to let the model decide.","enum":["auto","21:9","16:9","4:3","1:1","3:4","9:16"],"type":"string"},"duration":{"default":"auto","description":"Duration of the video in seconds. Supports 4 to 15 seconds, or auto to let the model decide based on the prompt.","enum":["auto","4","5","6","7","8","9","10","11","12","13","14","15"],"type":"string"},"end_user_id":{"default":"","description":"The unique user ID of the end user.","type":"string"},"generate_audio":{"default":true,"description":"Whether to generate synchronized audio for the video, including sound effects, ambient sounds, and lip-synced speech. The cost of video generation is the same regardless of whether audio is generated or not.","type":"boolean"},"prompt":{"default":"","description":"The text prompt used to generate the video","type":"string"},"resolution":{"default":"720p","description":"Video resolution - 480p for faster generation, 720p for balance.","enum":["480p","720p"],"type":"string"},"seed":{"default":"","description":"Random seed for reproducibility. Note that results may still vary slightly even with the same seed.","type":"string"}},"required":["prompt"],"type":"object"}},{"title":"Bytedance | Seedance 2.0 | Reference to Video | Fast","slug":"bytedance-seedance-2-0-reference-to-video-fast","version":"0.0.1","output_type":"video","request_schema":{"properties":{"aspect_ratio":{"default":"false","description":"The aspect ratio of the generated video. Use 16:9 for landscape, 9:16 for portrait/vertical, 1:1 for square, 21:9 for ultrawide cinematic, or auto to let the model decide.","enum":["auto","21:9","16:9","4:3","1:1","3:4","9:16"],"type":"string"},"audio_urls":{"default":"","description":"Reference audio to guide video generation. Refer to them in the prompt as @Audio1, @Audio2, etc. Supported formats: MP3, WAV. Up to 3 files, combined duration must not exceed 15 seconds. Max 15 MB per file.If audio is provided, at least one reference image or video is required.","type":"string"},"duration":{"default":"false","description":"Duration of the video in seconds. Supports 4 to 15 seconds, or auto to let the model decide based on the prompt.","enum":["auto","4","5","6","7","8","9","10","11","12","13","14","15"],"type":"string"},"end_user_id":{"default":"","description":"The unique user ID of the end user.","type":"string"},"generate_audio":{"default":true,"description":"Whether to generate synchronized audio for the video, including sound effects, ambient sounds, and lip-synced speech. The cost of video generation is the same regardless of whether audio is generated or not.","type":"boolean"},"image_urls":{"default":"","description":"Reference images to guide video generation. Refer to them in the prompt as @Image1, @Image2, etc. Supported formats: JPEG, PNG, WebP. Max 30 MB per image. Up to 9 images. Total files across all modalities must not exceed 12.","type":"string"},"prompt":{"default":"","description":"The text prompt used to generate the video.","type":"string"},"resolution":{"default":"false","description":"Video resolution - 480p for faster generation, 720p for balance.","enum":["480p","720p"],"type":"string"},"seed":{"default":"","description":"Random seed for reproducibility. Note that results may still vary slightly even with the same seed.","type":"string"},"video_urls":{"default":"","description":"Reference videos to guide video generation. Refer to them in the prompt as @Video1, @Video2, etc. Supported formats: MP4, MOV. Up to 3 videos, combined duration must be between 2 and 15 seconds, total size under 50 MB. Each video must be between ~480p (640x640) and ~720p (834x1112) in resolution.","type":"string"}},"required":["prompt"],"type":"object"}},{"title":"Bytedance | Seedance 2.0 | Image to Video | Fast","slug":"bytedance-seedance-2-0-image-to-video-fast","version":"0.0.1","output_type":"video","request_schema":{"properties":{"aspect_ratio":{"default":"auto","description":"The aspect ratio of the generated video. Use 16:9 for landscape, 9:16 for portrait/vertical, 1:1 for square, 21:9 for ultrawide cinematic, or auto to infer from the input image.","enum":["auto","21:9","16:9","4:3","1:1","3:4","9:16"],"type":"string"},"duration":{"default":"auto","description":"Duration of the video in seconds. Supports 4 to 15 seconds, or auto to let the model decide based on the prompt.","enum":["auto","4","5","6","7","8","9","10","11","12","13","14","15"],"type":"string"},"end_image_url":{"default":"","description":"The URL of the image to use as the last frame of the video. When provided, the generated video will transition from the starting image to this ending image. Supported formats: JPEG, PNG, WebP. Max 30 MB.","type":"string"},"end_user_id":{"default":"","description":"The unique user ID of the end user.","type":"string"},"generate_audio":{"default":true,"description":"Whether to generate synchronized audio for the video, including sound effects, ambient sounds, and lip-synced speech. The cost of video generation is the same regardless of whether audio is generated or not.","type":"boolean"},"image_url":{"default":"","description":"The URL of the starting frame image to animate. Supported formats: JPEG, PNG, WebP. Max 30 MB.","type":"string"},"prompt":{"default":"","description":"The text prompt describing the desired motion and action for the video.","type":"string"},"resolution":{"default":"720p","description":"Video resolution - 480p for faster generation, 720p for balance.","enum":["480p","720p"],"type":"string"},"seed":{"default":"","description":"Random seed for reproducibility. Note that results may still vary slightly even with the same seed.","type":"string"}},"required":["prompt","image_url"],"type":"object"}},{"title":"Bytedance | Seedance 2.0 | Text to Video | Fast","slug":"bytedance-seedance-2-0-text-to-video-fast","version":"0.0.1","output_type":"video","request_schema":{"properties":{"aspect_ratio":{"default":"auto","description":"The aspect ratio of the generated video. Use 16:9 for landscape, 9:16 for portrait/vertical, 1:1 for square, 21:9 for ultrawide cinematic, or auto to let the model decide.","enum":["auto","21:9","16:9","4:3","1:1","3:4","9:16"],"type":"string"},"duration":{"default":"auto","description":"Duration of the video in seconds. Supports 4 to 15 seconds, or auto to let the model decide based on the prompt.","enum":["auto","4","5","6","7","8","9","10","11","12","13","14","15"],"type":"string"},"end_user_id":{"default":"","description":"The unique user ID of the end user.","type":"string"},"generate_audio":{"default":true,"description":"Whether to generate synchronized audio for the video, including sound effects, ambient sounds, and lip-synced speech. The cost of video generation is the same regardless of whether audio is generated or not.","type":"boolean"},"prompt":{"default":"","description":"The text prompt used to generate the video","type":"string"},"resolution":{"default":"720p","description":"Video resolution - 480p for faster generation, 720p for balance.","enum":["480p","720p"],"type":"string"},"seed":{"default":"","description":"Random seed for reproducibility. Note that results may still vary slightly even with the same seed.","type":"string"}},"required":["prompt"],"type":"object"}},{"title":"Sync 3 | Lipsync","slug":"sync-3-lipsync","version":"0.0.1","output_type":"video","request_schema":{"properties":{},"type":"object"}},{"title":"Alibaba | Wan 2.7 | Video Edit","slug":"alibaba-wan-2-7-video-edit","version":"0.0.1","output_type":"video","request_schema":{"properties":{"audio_setting":{"default":"auto","description":"auto: model decides audio handling (default). origin: preserve original audio.","enum":["[\"auto\"","\"origin\"]"],"type":"string"},"duration":{"default":"","description":"Output video duration in seconds. Range: 2-10. If omitted, output keeps the input video duration.","maximum":0,"minimum":0,"type":"integer"},"negative_prompt":{"default":"","description":"Describe what to avoid in the edited video. Max 500 characters.","type":"string"},"prompt":{"default":"false","description":"Editing instructions, style transfer description, or reference-based edit prompt. Max 5,000 characters.","type":"string"},"prompt_extend":{"default":true,"description":"Intelligent prompt rewriting for better quality. Default: true.","type":"boolean"},"ratio":{"default":"","description":"Output video aspect ratio override. If omitted, output matches input video aspect ratio.","enum":["[\"16:9\"","\"9:16\"","\"1:1\"","\"4:3\"","\"3:4\"]"],"type":"string"},"reference_image":{"default":"false","description":"Reference image for reference-based editing (optional). JPEG/PNG/WEBP, max 20MB.","type":"string"},"resolution":{"default":"1080P","description":"Output resolution. 720P: lower cost. 1080P: higher quality (default).","enum":["[\"720P\"","\"1080P\"]"],"type":"string"},"seed":{"default":false,"description":"Seed for reproducibility. Random if omitted.","maximum":0,"minimum":0,"type":"integer"},"video_url":{"default":"false","description":"Input video to edit. MP4/MOV, 2-10s, max 100MB.","type":"string"}},"required":["video_url","prompt"],"type":"object"}},{"title":"Alibaba | Wan 2.7 | Reference to Video","slug":"alibaba-wan-2-7-reference-to-video","version":"0.0.1","output_type":"video","request_schema":{"properties":{"duration":{"default":5,"description":"Video duration in seconds. Range: 2-10. Default: 5.","maximum":0,"minimum":0,"type":"integer"},"first_frame":{"default":"","description":"URL of image for first frame composition control (optional). When provided, ratio is ignored.","type":"string"},"negative_prompt":{"default":"false","description":"Describe what to avoid. Max 500 characters.","type":"string"},"prompt":{"default":"false","description":"Text description of the video with reference character/object. Max 5,000 characters.","type":"string"},"prompt_extend":{"default":true,"description":"Intelligent prompt rewriting for better quality. Default: true.","type":"boolean"},"ratio":{"default":"16:9","description":"Output aspect ratio. 16:9: landscape (default). 9:16: portrait. 1:1: square.","enum":["[\"16:9\"","\"9:16\"","\"1:1\"","\"4:3\"","\"3:4\"]"],"type":"string"},"reference_image":{"default":"false","description":"Reference image for character/object appearance consistency. JPEG/PNG/WEBP, max 20MB.","type":"string"},"reference_video":{"default":"false","description":"Reference video for appearance + motion (optional). MP4/MOV, max 100MB.","type":"string"},"reference_voice":{"default":"","description":"URL of audio to set voice timbre for the reference character (optional). WAV/MP3, 1-10s, max 15 MB.","type":"string"},"resolution":{"default":"1080P","description":"Output resolution. 720P: lower cost. 1080P: higher quality (default).","enum":["[\"720P\"","\"1080P\"]"],"type":"string"},"seed":{"default":false,"description":"Seed for reproducibility. Random if omitted.","maximum":0,"minimum":0,"type":"integer"},"shot_type":{"default":"single","description":"single: one continuous shot (default). multi: multiple shots with transitions.","enum":["[\"single\"","\"multi\"]"],"type":"string"}},"required":["reference_image","prompt"],"type":"object"}},{"title":"Alibaba | Wan 2.7 | Text to Video","slug":"alibaba-wan-2-7-text-to-video","version":"0.0.1","output_type":"video","request_schema":{"properties":{"audio_url":{"default":"false","description":"Audio file for audio-visual sync (optional). WAV/MP3, 3-30s, max 15MB.","type":"string"},"duration":{"default":5,"description":"Video duration in seconds. Range: 2-15. Default: 5.","maximum":0,"minimum":0,"type":"integer"},"negative_prompt":{"default":"false","description":"Describe what to avoid. Max 500 characters.","type":"string"},"prompt":{"default":"false","description":"Text description of the video to generate. Max 5,000 characters.","type":"string"},"prompt_extend":{"default":true,"description":"Intelligent prompt rewriting for better quality. Default: true.","type":"boolean"},"ratio":{"default":"16:9","description":"Output aspect ratio. 16:9: landscape (default). 9:16: portrait. 1:1: square.","enum":["[\"16:9\"","\"9:16\"","\"1:1\"","\"4:3\"","\"3:4\"]"],"type":"string"},"resolution":{"default":"1080P","description":"Output resolution. 720P: lower cost. 1080P: higher quality (default).","enum":["[\"720P\"","\"1080P\"]"],"type":"string"},"seed":{"default":false,"description":"Seed for reproducibility. Random if omitted.","maximum":0,"minimum":0,"type":"integer"}},"required":["prompt"],"type":"object"}},{"title":"Alibaba | Wan 2.7 | Image to Video","slug":"alibaba-wan-2-7-image-to-video","version":"0.0.1","output_type":"video","request_schema":{"properties":{"driving_audio":{"default":"","description":"URL of audio for audio-visual synchronization (optional). WAV/MP3, 2-30s, max 15 MB.","type":"string"},"duration":{"default":5,"description":"Video duration in seconds. Range: 2-15. Each second is billed separately. Default: 5.","maximum":0,"minimum":0,"type":"integer"},"first_clip":{"default":"","description":"URL of a video to continue from (optional). Enables video continuation mode. MP4/MOV, 2-10s, max 100 MB. When provided, first_frame is ignored.","type":"string"},"first_frame":{"default":"false","description":"URL of the image to use as the first frame. Supported formats: JPEG, JPG, PNG, BMP, WEBP. Max 20 MB.","type":"string"},"last_frame":{"default":"false","description":"URL of the image to use as the last frame (optional). Enables first-and-last-frame generation mode.","type":"string"},"negative_prompt":{"default":"","description":"Describe what to avoid in the generated video. Max 500 characters.","type":"string"},"prompt":{"default":"false","description":"Text description to guide video generation. Supports Chinese and English. Max 5,000 characters. Optional but recommended.","type":"string"},"prompt_extend":{"default":true,"description":"Intelligent prompt rewriting for better quality. Default: true.","type":"boolean"},"resolution":{"default":"1080P","description":"Output video resolution. 720P: lower cost. 1080P: higher quality (default).","enum":["[\"720P\"","\"1080P\"]"],"type":"string"},"seed":{"default":false,"description":"Seed for reproducibility. Same seed produces similar results. Random if omitted.","maximum":0,"minimum":0,"type":"integer"}},"required":["first_frame"],"type":"object"}},{"title":"Veo 3.1 | Lite | First Last Frame to Video","slug":"veo-3-1-lite-first-last-frame-to-video","version":"0.0.1","output_type":"video","request_schema":{"properties":{},"type":"object"}},{"title":"Veo 3.1 | Lite | Image to Video","slug":"veo-3-1-lite-image-to-video","version":"0.0.1","output_type":"video","request_schema":{"properties":{},"type":"object"}},{"title":"Veo 3.1 | Lite | Text to Video","slug":"veo-3-1-lite-text-to-video","version":"0.0.1","output_type":"video","request_schema":{"properties":{},"type":"object"}},{"title":"Alibaba | Wan 2.7 | Text to Image","slug":"alibaba-wan-2-7-text-to-image","version":"0.0.1","output_type":"image","request_schema":{"properties":{"n":{"default":1,"description":"Number of images to generate. Range: 1-4. Each image is billed separately.","maximum":0,"minimum":0,"type":"integer"},"prompt":{"default":"false","description":"Text description of the image to generate. Supports Chinese and English. Max 5,000 characters.","type":"string"},"seed":{"default":false,"description":"Seed for reproducibility. Same seed produces similar results. Random if omitted.","maximum":0,"minimum":0,"type":"integer"},"size":{"default":"2K","description":"Output image resolution. 1K: 1024x1024. 2K: 2048x2048 (default).","enum":["[\"1K\"","\"2K\"]"],"type":"string"},"thinking_mode":{"default":true,"description":"Enhanced inference for better image quality at the cost of longer generation time. Default: true.","type":"boolean"}},"required":["prompt"],"type":"object"}},{"title":"Alibaba | Wan 2.7 | Image Edit","slug":"alibaba-wan-2-7-image-edit","version":"0.0.1","output_type":"array","request_schema":{"properties":{"img_url":{"default":"false","description":"URL of the primary input image to edit. Supported formats: JPEG, JPG, PNG, BMP, WEBP. Max 20 MB.","type":"string"},"img_url_2":{"default":"false","description":"URL of a second reference image (optional).","type":"string"},"img_url_3":{"default":"false","description":"URL of a third reference image (optional).","type":"string"},"img_url_4":{"default":"false","description":"URL of a fourth reference image (optional).","type":"string"},"n":{"default":1,"description":"Number of images to generate. Range: 1-4. Each image is billed separately.","maximum":0,"minimum":0,"type":"integer"},"prompt":{"default":"false","description":"Text instruction describing the desired editing operation. Supports Chinese and English. Max 5,000 characters.","type":"string"},"seed":{"default":false,"description":"Seed for reproducibility. Same seed produces similar results. Random if omitted.","maximum":0,"minimum":0,"type":"integer"},"size":{"default":"2K","description":"Output image resolution. 1K: ~1024px. 2K: ~2048px (default). Aspect ratio matches last input image.","enum":["[\"1K\"","\"2K\"]"],"type":"string"}},"required":["prompt","img_url"],"type":"object"}},{"title":"Alibaba | Wan 2.7 | Pro | Text to Image","slug":"alibaba-wan-2-7-pro-text-to-image","version":"0.0.1","output_type":"image","request_schema":{"properties":{"n":{"default":1,"description":"Number of images to generate. Range: 1-4. Each image is billed separately.","maximum":0,"minimum":0,"type":"integer"},"prompt":{"default":"false","description":"Text description of the image to generate. Supports Chinese and English. Max 5,000 characters.","type":"string"},"seed":{"default":false,"description":"Seed for reproducibility. Same seed produces similar results. Random if omitted.","maximum":0,"minimum":0,"type":"integer"},"size":{"default":"2K","description":"Output image resolution. 1K: 1024x1024. 2K: 2048x2048 (default). 4K: 4096x4096 (Pro only).","enum":["[\"1K\"","\"2K\"","\"4K\"]"],"type":"string"},"thinking_mode":{"default":true,"description":"Enhanced inference for better image quality at the cost of longer generation time. Default: true.","type":"boolean"}},"required":["prompt"],"type":"object"}},{"title":"Alibaba | Wan 2.7 | Pro | Image Edit","slug":"alibaba-wan-2-7-pro-image-edit","version":"0.0.1","output_type":"array","request_schema":{"properties":{"img_url":{"default":"false","description":"URL of the primary input image to edit. Supported formats: JPEG, JPG, PNG, BMP, WEBP. Max 20 MB.","type":"string"},"img_url_2":{"default":"false","description":"URL of a second reference image (optional).","type":"string"},"img_url_3":{"default":"false","description":"URL of a third reference image (optional).","type":"string"},"img_url_4":{"default":"false","description":"URL of a fourth reference image (optional).","type":"string"},"n":{"default":1,"description":"Number of images to generate. Range: 1-4. Each image is billed separately.","maximum":0,"minimum":0,"type":"integer"},"prompt":{"default":"false","description":"Text instruction describing the desired editing operation. Supports Chinese and English. Max 5,000 characters.","type":"string"},"seed":{"default":false,"description":"Seed for reproducibility. Same seed produces similar results. Random if omitted.","maximum":0,"minimum":0,"type":"integer"},"size":{"default":"2K","description":"Output image resolution. 1K: ~1024px. 2K: ~2048px (default). Aspect ratio matches last input image.","enum":["[\"1K\"","\"2K\"]"],"type":"string"}},"required":["prompt","img_url"],"type":"object"}},{"title":"Scale Video","slug":"scale-video","version":"0.0.1","output_type":"video","request_schema":{"properties":{"codec":{"default":"libx264","description":"Video codec to use for encoding. libx264 (H.264) is widely compatible, libx265 (H.265/HEVC) offers better compression.","enum":["libx264","libx265"],"type":"string"},"crf":{"default":18,"description":"Constant Rate Factor for quality (0-51). Lower values mean better quality and larger files. 18 is visually lossless for most content.","maximum":51,"minimum":0,"type":"integer"},"height":{"default":"","description":"Target height in pixels","maximum":4320,"minimum":2,"type":"integer"},"mode":{"default":"stretch","description":"Scaling mode. 'stretch' scales the video to the exact target dimensions (may distort aspect ratio). 'pad' scales to fit within the target dimensions while preserving aspect ratio, then pads with the chosen color to fill the remaining space (letterbox/pillarbox). 'crop' scales to cover the target dimensions while preserving aspect ratio, then center-crops to the exact target size.","enum":["stretch","pad","crop"],"type":"string"},"pad_color":{"default":"black","description":"Padding color when mode is 'pad'. Ignored for other modes.","enum":["black","white","red","green","blue","gray"],"type":"string"},"preset":{"default":"fast","description":"Encoding speed preset. Slower presets give better compression but take longer.","enum":["ultrafast","fast","medium","slow"],"type":"string"},"video_url":{"default":"","description":"URL of the video file to scale/resize. Height and Width of the video must be even numbers for compatibility with video codecs.","type":"string"},"width":{"default":"","description":"Target width in pixels","maximum":7680,"minimum":2,"type":"integer"}},"required":["width","height","video_url"],"type":"object"}},{"title":"Ltx v2.3 | Lipsync","slug":"ltx-v2-3-lipsync","version":"0.0.1","output_type":"video","request_schema":{"properties":{},"type":"object"}},{"title":"XAI | Grok Imagine | Reference to Video","slug":"xai-grok-imagine-reference-to-video","version":"0.0.1","output_type":"video","request_schema":{"properties":{},"type":"object"}},{"title":"XAI | Grok Imagine | Extend Video","slug":"xai-grok-imagine-extend-video","version":"0.0.1","output_type":"video","request_schema":{"properties":{},"type":"object"}},{"title":"Sora 2 | Characters","slug":"sora-2-characters","version":"0.0.1","output_type":"text","request_schema":{"properties":{},"type":"object"}},{"title":"Pruna | P-Image Edit LoRA | Image Editing","slug":"p-image-edit-lora-image-edit","version":"0.0.1","output_type":"image","request_schema":{"properties":{"aspect_ratio":{"default":"match_input_image","description":"Output aspect ratio. match_input_image: match input image ratio (default).","enum":["match_input_image","1:1","16:9","9:16","4:3","3:4","3:2","2:3"],"type":"string"},"disable_safety_checker":{"default":false,"description":"Disable safety checker for generated images.","type":"boolean"},"hf_api_token":{"default":"false","description":"HuggingFace API token for accessing private LoRA repositories.","type":"string"},"images":{"default":false,"description":"Array of 1-5 image URLs for editing.","items":{"type":"image"},"maxItems":5,"minItems":1,"type":"array"},"lora_scale":{"default":1,"description":"LoRA strength (-1 to 3). Default 1 for edit LoRAs.","maximum":0,"minimum":0,"type":"number"},"lora_weights":{"default":"false","description":"HuggingFace URL to LoRA weights. Must be trained with p-image-edit-trainer.","type":"string"},"prompt":{"default":"false","description":"Text prompt describing the edit. Refer to images as image 1, image 2, etc.","type":"string"},"seed":{"default":false,"description":"Random seed for reproducible generation.","maximum":0,"minimum":0,"type":"integer"},"turbo":{"default":true,"description":"Faster optimizations. Set false for complex tasks.","type":"boolean"}},"required":["prompt","images"],"type":"object"}},{"title":"Pruna | P-Image LoRA | Text to Image","slug":"p-image-lora-text-to-image","version":"0.0.1","output_type":"image","request_schema":{"properties":{"aspect_ratio":{"default":"16:9","description":"Aspect ratio for the generated image. custom: use width/height for exact dimensions.","enum":["1:1","16:9","9:16","4:3","3:4","3:2","2:3","custom"],"type":"string"},"disable_safety_checker":{"default":false,"description":"Disable safety checker for generated images.","type":"boolean"},"height":{"default":false,"description":"Custom height in pixels (256-1440, multiple of 16). Only when aspect_ratio=custom.","maximum":0,"minimum":0,"type":"integer"},"hf_api_token":{"default":"false","description":"HuggingFace API token for accessing private LoRA repositories.","type":"string"},"lora_scale":{"default":0.5,"description":"LoRA strength (-1 to 3). 0.5 works well for most LoRAs.","maximum":0,"minimum":0,"type":"number"},"lora_weights":{"default":"false","description":"HuggingFace URL to LoRA weights. Format: huggingface.co/\u003cowner\u003e/\u003cmodel-name\u003e[/\u003cfile.safetensors\u003e]. Must be trained with p-image-trainer.","type":"string"},"prompt":{"default":"false","description":"Text description of the image to generate.","type":"string"},"prompt_upsampling":{"default":false,"description":"Upsample prompt with LLM for enhanced results.","type":"boolean"},"seed":{"default":false,"description":"Random seed for reproducible generation.","maximum":0,"minimum":0,"type":"integer"},"width":{"default":false,"description":"Custom width in pixels (256-1440, multiple of 16). Only when aspect_ratio=custom.","maximum":0,"minimum":0,"type":"integer"}},"required":["prompt"],"type":"object"}},{"title":"Google | Text to Speech","slug":"google-text-to-speech","version":"0.0.1","output_type":"audio","request_schema":{"properties":{},"type":"object"}},{"title":"Deepgram | Nova-3 | Speech to Text Pro","slug":"deepgram-nova-3-speech-to-text-pro","version":"0.0.1","output_type":"object","request_schema":{"properties":{"detect_entities":{"default":true,"description":"Extract named entities: persons, organizations, locations, dates, ordinals. Returns label, value, confidence per entity.","type":"boolean"},"diarize":{"default":true,"description":"Identify different speakers. Each word includes speaker ID and confidence score.","type":"boolean"},"filler_words":{"default":true,"description":"Include filler words (uh, um) in transcript. Useful for conversation analysis.","type":"boolean"},"intents":{"default":true,"description":"Detect speaker intents. Returns intent labels with confidence scores per segment.","type":"boolean"},"language_code":{"default":"auto","description":"Language of the audio (BCP-47). auto: automatic detection. multi: multilingual (up to 10 languages). 47+ languages supported.","enum":["auto","multi","tr","ur","en","nl","uk","es","ar","de","fr","it","ja","ko","pt","ru","zh","hi","bn","cs","da","fi","el","he","hu","id","ms","no","pl","ro","sk","sv","ta","te","th","vi"],"type":"string"},"media_url":{"default":"","description":"Audio file URL to transcribe. Supports mp3, wav, m4a, flac, ogg, webm, mp4, and 100+ audio formats.","type":"string"},"model":{"default":"nova-3","description":"Deepgram model. nova-3: latest, best accuracy. nova-2: previous generation.","enum":["nova-3","nova-2"],"type":"string"},"multichannel":{"default":false,"description":"Transcribe each audio channel independently. Max 5 channels.","type":"boolean"},"numerals":{"default":false,"description":"Convert spoken numbers to digits (e.g., three hundred to 300).","type":"boolean"},"paragraphs":{"default":true,"description":"Split transcript into paragraphs with sentences. Requires punctuate=true.","type":"boolean"},"profanity_filter":{"default":false,"description":"Replace profanity with asterisks in transcript.","type":"boolean"},"punctuate":{"default":true,"description":"Add punctuation and capitalization to transcript.","type":"boolean"},"redact":{"default":"false","description":"Redact sensitive info. false: none. pci: credit cards. pii: SSN/phone/email. numbers: all numeric sequences.","enum":["false","pci","pii","numbers"],"type":"string"},"sentiment":{"default":true,"description":"Analyze sentiment per segment (positive/negative/neutral with score) and overall average.","type":"boolean"},"smart_format":{"default":true,"description":"Auto-format currency, phone numbers, emails, dates for readability.","type":"boolean"},"summarize":{"default":true,"description":"Generate a short text summary of the transcript content.","type":"boolean"},"topics":{"default":true,"description":"Detect topics discussed. Returns topic labels with confidence scores per segment.","type":"boolean"},"utt_split":{"default":0.8,"description":"Seconds of silence to split utterances. Lower = more splits. Only when utterances=true. Default 0.8.","maximum":0,"minimum":0,"type":"number"},"utterances":{"default":true,"description":"Segment speech into semantic units. Returns array with start/end times, speaker, channel, transcript per utterance.","type":"boolean"}},"required":["media_url"],"type":"object"}},{"title":"Deepgram | Nova-3 | Speech to Text","slug":"deepgram-nova-3-speech-to-text","version":"0.0.1","output_type":"object","request_schema":{"properties":{"diarize":{"default":true,"description":"Identify different speakers in the audio. When enabled, each word includes a speaker ID (integer) and speaker_confidence score. Essential for multi-speaker audio like meetings, interviews, and phone calls.","type":"boolean"},"language_code":{"default":"auto","description":"Language of the audio (BCP-47 code). auto: automatic detection (recommended). multi: multilingual audio with up to 10 languages. tr: Turkish, ur: Urdu, en: English, nl: Dutch, uk: Ukrainian, es: Spanish, ar: Arabic, de: German, fr: French, it: Italian, ja: Japanese, ko: Korean, pt: Portuguese, ru: Russian, zh: Chinese, hi: Hindi. Nova-3 supports 47+ languages.","enum":["auto","multi","tr","ur","en","nl","uk","es","ar","de","fr","it","ja","ko","pt","ru","zh","hi","bn","cs","da","fi","el","he","hu","id","ms","no","pl","ro","sk","sv","ta","te","th","vi"],"type":"string"},"media_url":{"default":"","description":"Audio file URL to transcribe. Supports mp3, wav, m4a, flac, ogg, webm, mp4, and 100+ audio formats. The file must be publicly accessible or an EachLabs-uploaded file.","type":"string"},"model":{"default":"nova-3","description":"Deepgram speech recognition model. nova-3: latest generation, best accuracy, 47+ languages, recommended for all use cases. nova-2: previous generation, still available for backward compatibility.","enum":["nova-3","nova-2"],"type":"string"},"multichannel":{"default":false,"description":"Transcribe each audio channel independently. Enable when each channel contains a single speaker (e.g., stereo call recordings with one speaker per channel). Max 5 channels. Each word includes a channel index.","type":"boolean"},"punctuate":{"default":true,"description":"Add punctuation marks and capitalization to the transcript. Produces more readable output. Recommended for most use cases.","type":"boolean"},"smart_format":{"default":true,"description":"Auto-format currency amounts, phone numbers, email addresses, dates, and other entities for enhanced readability. Recommended for most use cases.","type":"boolean"}},"required":["media_url"],"type":"object"}},{"title":"xAI | Grok TTS | Text to Speech","slug":"xai-grok-tts-text-to-speech","version":"0.0.1","output_type":"audio","request_schema":{"properties":{"bit_rate":{"default":128000,"description":"MP3 bit rate in bps (MP3 codec only, ignored for other codecs). 32000: low/smallest file. 64000: medium/speech. 96000: standard. 128000: high quality (default). 192000: maximum fidelity.","maximum":0,"minimum":0,"type":"integer"},"output_format":{"default":"mp3","description":"Audio codec. mp3: general use, good compression (default). wav: lossless, for editing/post-production. pcm: raw 16-bit audio for processing pipelines. mulaw: G.711 mu-law for telephony. alaw: G.711 A-law for telephony.","enum":["mp3","wav","pcm","mulaw","alaw"],"type":"string"},"sample_rate":{"default":24000,"description":"Audio sample rate in Hz. 8000: narrowband telephony. 16000: wideband speech. 22050: standard balanced. 24000: high quality (default). 44100: CD quality. 48000: studio-grade.","maximum":0,"minimum":0,"type":"integer"},"text":{"default":"","description":"Text to convert to speech. Maximum 15000 characters. Supports 20+ languages (auto-detected). Inline speech tags: [pause], [long-pause], [laugh], [chuckle], [giggle], [cry], [tsk], [tongue-click], [lip-smack], [breath], [inhale], [exhale], [sigh], [hum-tune]. Wrapping tags: \u003cwhisper\u003e, \u003csoft\u003e, \u003cloud\u003e, \u003cslow\u003e, \u003cfast\u003e, \u003chigher-pitch\u003e, \u003clower-pitch\u003e, \u003csing-song\u003e, \u003csinging\u003e, \u003claugh-speak\u003e, \u003cemphasis\u003e, \u003cbuild-intensity\u003e, \u003cdecrease-intensity\u003e.","type":"string"},"voice_id":{"default":"eve","description":"Voice selection (case-insensitive). eve: energetic and upbeat, great for demos and announcements. ara: warm and friendly, ideal for conversational interfaces. rex: confident and clear, best for business and tutorials. sal: smooth and balanced, versatile for varied content. leo: authoritative and strong, perfect for instructional content.","enum":["eve","ara","rex","sal","leo"],"type":"string"}},"required":["text"],"type":"object"}},{"title":"Firered Image | Edit | v1.1","slug":"firered-image-edit-v1-1","version":"0.0.1","output_type":"array","request_schema":{"properties":{"acceleration":{"default":"regular","description":"The acceleration level to use for inference speed optimization.","enum":["none","regular","high"],"type":"string"},"enable_safety_checker":{"default":true,"description":"If set to true, the safety checker will be enabled.","type":"boolean"},"guidance_scale":{"default":4,"description":"Classifier-free guidance scale. Higher values make the model follow the prompt more closely.","maximum":10,"minimum":1,"type":"number"},"image_size":{"default":"square_hd","description":"The size of the generated image. If None, uses the input image dimensions.","enum":["square_hd","square","portrait_4_3","portrait_16_9","landscape_4_3","landscape_16_9"],"type":"string"},"image_urls":{"default":"","description":"The URLs of the images to edit. Supports single image editing and multi-image references (e.g., virtual try-on, style transfer from reference, portrait makeup).","items":{"type":"image"},"maxItems":10,"minItems":1,"type":"array"},"negative_prompt":{"default":"","description":"The negative prompt for the generation.","type":"string"},"num_images":{"default":1,"description":"The number of images to generate.","maximum":4,"minimum":1,"type":"integer"},"num_inference_steps":{"default":30,"description":"The number of inference steps to perform. More steps generally produce higher quality results.","maximum":50,"minimum":2,"type":"integer"},"output_format":{"default":"png","description":"The format of the generated image.","enum":["jpeg","png"],"type":"string"},"prompt":{"default":"","description":"The editing instruction describing what changes to make to the image. Supports both English and Chinese instructions.","type":"string"},"seed":{"default":"","description":"The same seed and the same prompt given to the same version of the model will output the same image every time.\n        ","type":"string"}},"required":["image_urls","prompt"],"type":"object"}},{"title":"Kling | o3 | Standard | Text to Video","slug":"kling-o3-standard-text-to-video","version":"0.0.1","output_type":"video","request_schema":{"properties":{"aspect_ratio":{"default":"16:9","description":"Aspect ratio of the generated video.","enum":["16:9","9:16","1:1"],"type":"string"},"duration":{"default":"8","description":"Video duration in seconds (3-15s).","enum":["3","4","5","6","7","8","9","10","11","12","13","14","15"],"type":"string"},"generate_audio":{"default":true,"description":"Whether to generate native audio for the video.","type":"boolean"},"multi_prompt":{"default":"","description":"List of prompts for multi-shot video generation.","items":{"properties":{"duration":{"component":"select","name":"duration","options":[3,4,5,6,7,8,9,10,11,12,13,14,15],"order":1,"required":true,"title":"Duration","type":"string"},"prompt":{"component":"input","name":"prompt","order":0,"required":true,"title":"Prompt","type":"string"}},"required":["prompt","duration"],"type":"object"},"maxItems":5,"type":"array"},"prompt":{"default":"","description":"Text prompt for video generation. Required unless multi_prompt is provided.","type":"string"},"shot_type":{"default":"customize","description":"The type of multi-shot video generation.","type":"string"},"voice_ids":{"default":"","description":"Optional Voice IDs for video generation. Reference voices in your prompt with \u003c\u003c\u003cvoice_1\u003e\u003e\u003e and \u003c\u003c\u003cvoice_2\u003e\u003e\u003e (maximum 2 voices per task). Get voice IDs from the kling video create-voice endpoint: https://www.eachlabs.ai/kling/kling-voice/kling-voice-create","items":{"type":"string"},"maxItems":2,"type":"array"}},"type":"object"}},{"title":"Kling | v3 | Standard | Motion Control","slug":"kling-v3-standard-motion-control","version":"0.0.1","output_type":"video","request_schema":{"properties":{"character_orientation":{"default":"video","description":"Controls whether the output character's orientation matches the reference image or video. 'video': orientation matches reference video - better for complex motions (max 30s). 'image': orientation matches reference image - better for following camera movements (max 10s).","enum":["image","video"],"type":"string"},"elements":{"default":false,"description":"Optional element for facial consistency binding. Upload a facial element to enhance identity preservation in the generated video. Only 1 element is supported. Reference in prompt as @Element1. Element binding is only supported when character_orientation is 'video'.","items":{"properties":{"frontal_image_url":{"component":"image","name":"frontal_image_url","order":2,"required":true,"title":"frontal_image_url","type":"string"},"reference_image_urls":{"array_items_json":"{\n  \"type\": \"image\"\n}","component":"array_input","maximum":3,"minimum":1,"name":"reference_image_urls","order":1,"required":true,"title":"reference_image_urls","type":"array"}},"required":["reference_image_urls","frontal_image_url"],"type":"object"},"maxItems":1,"type":"array"},"image_url":{"default":"false","description":"Reference image URL. The characters, backgrounds, and other elements in the generated video are based on this reference image. Characters should have clear body proportions, avoid occlusion, and occupy more than 5% of the image area.","type":"string"},"keep_original_sound":{"default":true,"description":"Whether to keep the original sound from the reference video.","type":"boolean"},"prompt":{"default":"false","type":"string"},"video_url":{"default":"false","description":"Reference video URL. The character actions in the generated video will be consistent with this reference video. Should contain a realistic style character with entire body or upper body visible, including head, without obstruction. Duration limit depends on character_orientation: 10s max for 'image', 30s max for 'video'.","type":"string"}},"required":["character_orientation","video_url","image_url"],"type":"object"}},{"title":"Kling | v3 | Pro | Motion Control","slug":"kling-v3-pro-motion-control","version":"0.0.1","output_type":"video","request_schema":{"properties":{"character_orientation":{"default":"video","description":"Controls whether the output character's orientation matches the reference image or video. 'video': orientation matches reference video - better for complex motions (max 30s). 'image': orientation matches reference image - better for following camera movements (max 10s).","enum":["image","video"],"type":"string"},"elements":{"default":false,"description":"Optional element for facial consistency binding. Upload a facial element to enhance identity preservation in the generated video. Only 1 element is supported. Reference in prompt as @Element1. Element binding is only supported when character_orientation is 'video'.","items":{"properties":{"frontal_image_url":{"component":"image","name":"frontal_image_url","order":2,"required":true,"title":"frontal_image_url","type":"string"},"reference_image_urls":{"array_items_json":"{\n  \"type\": \"image\"\n}","component":"array_input","maximum":3,"minimum":1,"name":"reference_image_urls","order":1,"required":true,"title":"reference_image_urls","type":"array"}},"required":["reference_image_urls","frontal_image_url"],"type":"object"},"maxItems":1,"type":"array"},"image_url":{"default":"false","description":"Reference image URL. The characters, backgrounds, and other elements in the generated video are based on this reference image. Characters should have clear body proportions, avoid occlusion, and occupy more than 5% of the image area.","type":"string"},"keep_original_sound":{"default":true,"description":"Whether to keep the original sound from the reference video. ","type":"boolean"},"prompt":{"default":"false","type":"string"},"video_url":{"default":"false","description":"Reference video URL. The character actions in the generated video will be consistent with this reference video. Should contain a realistic style character with entire body or upper body visible, including head, without obstruction. Duration limit depends on character_orientation: 10s max for 'image', 30s max for 'video'.","type":"string"}},"required":["character_orientation","video_url","image_url"],"type":"object"}},{"title":"P video","slug":"p-video","version":"0.0.1","output_type":"video","request_schema":{"properties":{"aspect_ratio":{"default":"16:9","description":"An enumeration.","enum":["16:9","9:16","4:3","3:4","3:2","2:3","1:1"],"type":"string"},"audio":{"default":"","description":"Input audio to condition video generation. Supports flac, mp3, wav.","type":"string"},"draft":{"default":false,"description":"Draft mode. Generates a lower-quality preview of the video.","type":"boolean"},"duration":{"default":9,"description":"Duration of the video in seconds (1-10). Ignored when audio is provided.","maximum":10,"minimum":1,"type":"integer"},"fps":{"default":24,"description":"An enumeration.","maximum":0,"minimum":0,"type":"integer"},"image":{"default":"","description":"Input image to generate video from (image-to-video). Supports jpg, jpeg, png, webp.","type":"string"},"prompt":{"default":"","description":"Text prompt for video generation.","type":"string"},"prompt_upsampling":{"default":true,"description":"Use prompt upsampling to enhance the prompt.","type":"boolean"},"resolution":{"default":"720p","description":"An enumeration.","enum":["720p","1080p"],"type":"string"},"save_audio":{"default":true,"description":"Save the video with audio.","type":"boolean"},"seed":{"default":"","description":"Random seed. Set for reproducible generation.","maximum":0,"minimum":0,"type":"integer"}},"required":["prompt"],"type":"object"}},{"title":"Nano Banana 2 | Edit","slug":"nano-banana-2-edit","version":"0.0.1","output_type":"array","request_schema":{"properties":{"aspect_ratio":{"default":"1:1","description":"The aspect ratio of the generated image.","enum":["1:1","3:2","2:3","3:4","4:3","4:5","5:4","9:16","16:9","21:9"],"type":"string"},"image_urls":{"default":"","description":"The URLs of the images to use for image-to-image generation or image editing.","items":{"type":"image"},"maxItems":10,"minItems":1,"type":"array"},"limit_generations":{"default":true,"description":"Experimental parameter to limit the number of generations from each round of prompting to 1. Set to `True` to to disregard any instructions in the prompt regarding the number of images to generate.","type":"boolean"},"num_images":{"default":1,"description":"The number of images to generate.","maximum":4,"minimum":1,"type":"integer"},"output_format":{"default":"png","description":"The format of the generated image.","enum":["jpeg","png","webp"],"type":"string"},"prompt":{"default":"","description":"The prompt for image editing.","type":"string"},"resolution":{"default":"1K","description":"The resolution of the image to generate.","enum":["1K","2K","4K"],"type":"string"}},"required":["prompt","image_urls"],"type":"object"}},{"title":"Nano Banana 2 | Text to Image","slug":"nano-banana-2-text-to-image","version":"0.0.1","output_type":"array","request_schema":{"properties":{"aspect_ratio":{"default":"1:1","description":"Aspect Ratio","enum":["1:1","3:2","2:3","3:4","4:3","4:5","5:4","9:16","16:9","21:9"],"type":"string"},"limit_generations":{"default":true,"description":"Experimental parameter to limit the number of generations from each round of prompting to 1. Set to `True` to to disregard any instructions in the prompt regarding the number of images to generate.","type":"boolean"},"num_images":{"default":1,"description":"The number of images to generate.","maximum":4,"minimum":1,"type":"integer"},"output_format":{"default":"png","description":"The format of the generated image.","enum":["jpeg","png","webp"],"type":"string"},"prompt":{"default":"","description":"The text prompt to generate an image from.","type":"string"},"resolution":{"default":"1K","description":"The resolution of the image to generate.","enum":["1K","2K","4K"],"type":"string"}},"required":["prompt"],"type":"object"}},{"title":"Bytedance | Seedream | v5 | Lite | Text to Image","slug":"bytedance-seedream-v5-lite-text-to-image","version":"0.0.1","output_type":"array","request_schema":{"properties":{},"type":"object"}},{"title":"Bytedance | Seedream | v5 | Lite | Edit","slug":"bytedance-seedream-v5-lite-edit","version":"0.0.1","output_type":"array","request_schema":{"properties":{},"type":"object"}},{"title":"Kling | v3 | Image to Image","slug":"kling-v3-image-to-image","version":"0.0.1","output_type":"array","request_schema":{"properties":{"aspect_ratio":{"default":"16:9","description":"Aspect ratio of generated images.","enum":["16:9","9:16","1:1","4:3","3:4","3:2","2:3","21:9"],"type":"string"},"elements":{"default":false,"description":"Optional: Elements (characters/objects) to include in the image for face control.","items":{"properties":{"frontal_image_url":{"component":"image","name":"frontal_image_url","order":2,"required":false,"title":"frontal_image_url","type":"string"},"reference_image_urls":{"array_items_json":"{\n  \"type\": \"image\"\n}","component":"array_input","maximum":3,"minimum":1,"name":"reference_image_urls","order":1,"required":true,"title":"reference_image_urls","type":"array"}},"required":["reference_image_urls"],"type":"object"},"maxItems":10,"type":"array"},"image_url":{"default":"false","description":"Reference image for image-to-image generation.","type":"string"},"num_images":{"default":1,"description":"Number of images to generate (1-9)","maximum":9,"minimum":1,"type":"integer"},"output_format":{"default":"png","description":"The format of the generated image.","enum":["jpeg","png","webp"],"type":"string"},"prompt":{"default":"false","description":"Text prompt for image generation. Max 2500 characters.","type":"string"},"resolution":{"default":"1K","description":"Image generation resolution. 1K: standard, 2K: high-res.","enum":["1K","2K"],"type":"string"}},"required":["image_url","prompt"],"type":"object"}},{"title":"Kling | v3 | Text to Image","slug":"kling-v3-text-to-image","version":"0.0.1","output_type":"array","request_schema":{"properties":{"aspect_ratio":{"default":"16:9","description":"Aspect ratio of generated images.","enum":["16:9","9:16","1:1","4:3","3:4","3:2","2:3","21:9"],"type":"string"},"elements":{"default":false,"description":"Optional: Elements (characters/objects) to include in the image for face control. Each element can have a frontal image and optionally reference images.","items":{"properties":{"frontal_image_url":{"component":"image","name":"frontal_image_url","order":2,"required":false,"title":"frontal_image_url","type":"string"},"reference_image_urls":{"array_items_json":"{\n  \"type\": \"image\"\n}","component":"array_input","maximum":3,"minimum":1,"name":"reference_image_urls","order":1,"required":true,"title":"reference_image_urls","type":"array"}},"required":["reference_image_urls"],"type":"object"},"maxItems":10,"type":"array"},"negative_prompt":{"default":"false","description":"Negative text prompt. It is recommended to supplement negative prompt information through negative sentences directly within positive prompts.","type":"string"},"num_images":{"default":1,"description":"Number of images to generate (1-9).","maximum":9,"minimum":1,"type":"integer"},"output_format":{"default":"png","description":"The format of the generated image.","enum":["jpeg","png","webp"],"type":"string"},"prompt":{"default":"false","description":"Text prompt for image generation. Max 2500 characters.","type":"string"},"resolution":{"default":"1K","description":"Image generation resolution. 1K: standard, 2K: high-res","enum":["1K","2K"],"type":"string"}},"required":["prompt"],"type":"object"}},{"title":"Kling | o3 | Pro | Video to Video | Reference","slug":"kling-o3-pro-video-to-video-reference","version":"0.0.1","output_type":"video","request_schema":{"properties":{"aspect_ratio":{"default":"16:9","description":"Aspect ratio.","enum":["16:9","9:16","1:1"],"type":"string"},"duration":{"default":"5","description":"Video duration in seconds (3-15s for reference video).","enum":["3","4","5","6","7","8","9","10","11","12","13","14","15"],"type":"string"},"elements":{"default":false,"description":"Elements (characters/objects) to include. Reference in prompt as @Element1, @Element2.","items":{"properties":{"frontal_image_url":{"component":"image","name":"frontal_image_url","order":2,"required":true,"title":"frontal_image_url","type":"string"},"reference_image_urls":{"array_items_json":"{\n  \"type\": \"image\"\n}","component":"array_input","maximum":3,"minimum":1,"name":"reference_image_urls","order":1,"required":true,"title":"reference_image_urls","type":"array"}},"required":["reference_image_urls","frontal_image_url"],"type":"object"},"maxItems":4,"type":"array"},"image_urls":{"default":false,"description":"Reference images for style/appearance. Reference in prompt as @Image1, @Image2, etc. Maximum 4 total (elements + reference images) when using video.","items":{"type":"image"},"maxItems":4,"type":"array"},"keep_audio":{"default":true,"description":"Whether to keep the original audio from the reference video.","type":"boolean"},"prompt":{"default":"false","description":"Text prompt for video generation. Reference video as @Video1.","type":"string"},"shot_type":{"default":"customize","description":"The type of multi-shot video generation.","enum":["customize"],"type":"string"},"video_url":{"default":"false","description":"Reference video URL. Only .mp4/.mov formats, 3-10s duration, 720-2160px resolution, max 200MB.","type":"string"}},"required":["video_url","prompt"],"type":"object"}},{"title":"Kling | o3 | Standard | Video to Video | Reference","slug":"kling-o3-standard-video-to-video-reference","version":"0.0.1","output_type":"video","request_schema":{"properties":{"aspect_ratio":{"default":"16:9","description":"Aspect ratio.","enum":["16:9","9:16","1:1"],"type":"string"},"duration":{"default":"5","description":"Video duration in seconds (3-15s for reference video).","enum":["3","4","5","6","7","8","9","10","11","12","13","14","15"],"type":"string"},"elements":{"default":false,"description":"Elements (characters/objects) to include. Reference in prompt as @Element1, @Element2.","items":{"properties":{"frontal_image_url":{"component":"image","name":"frontal_image_url","order":2,"required":true,"title":"frontal_image_url","type":"string"},"reference_image_urls":{"array_items_json":"{\n  \"type\": \"image\"\n}","component":"array_input","maximum":3,"minimum":1,"name":"reference_image_urls","order":1,"required":true,"title":"reference_image_urls","type":"array"}},"required":["reference_image_urls","frontal_image_url"],"type":"object"},"maxItems":4,"type":"array"},"image_urls":{"default":false,"description":"Reference images for style/appearance. Reference in prompt as @Image1, @Image2, etc. Maximum 4 total (elements + reference images) when using video.","items":{"type":"image"},"maxItems":4,"type":"array"},"keep_audio":{"default":true,"description":"Whether to keep the original audio from the reference video.","type":"boolean"},"prompt":{"default":"false","description":"Text prompt for video generation. Reference video as @Video1.","type":"string"},"shot_type":{"default":"customize","description":"The type of multi-shot video generation.","enum":["customize"],"type":"string"},"video_url":{"default":"false","description":"Reference video URL. Only .mp4/.mov formats, 3-10s duration, 720-2160px resolution, max 200MB.","type":"string"}},"required":["video_url","prompt"],"type":"object"}},{"title":"Kling | o3 | Pro | Video to Video | Edit","slug":"kling-o3-pro-video-to-video-edit","version":"0.0.1","output_type":"video","request_schema":{"properties":{"elements":{"default":false,"description":"Elements (characters/objects) to include. Reference in prompt as @Element1, @Element2.","items":{"properties":{"frontal_image_url":{"component":"image","name":"frontal_image_url","order":2,"required":true,"title":"frontal_image_url","type":"string"},"reference_image_urls":{"array_items_json":"{\n  \"type\": \"image\"\n}","component":"array_input","maximum":3,"minimum":1,"name":"reference_image_urls","order":1,"required":true,"title":"reference_image_urls","type":"array"}},"required":["reference_image_urls","frontal_image_url"],"type":"object"},"maxItems":4,"type":"array"},"image_urls":{"default":false,"description":"Reference images for style/appearance. Reference in prompt as @Image1, @Image2, etc. Maximum 4 total (elements + reference images) when using video.","items":{"type":"image"},"maxItems":4,"type":"array"},"keep_audio":{"default":true,"description":"Whether to keep the original audio from the reference video.","type":"boolean"},"prompt":{"default":"false","description":"Text prompt for video generation. Reference video as @Video1.","type":"string"},"shot_type":{"default":"customize","description":"The type of multi-shot video generation.","enum":["customize"],"type":"string"},"video_url":{"default":"false","description":"Reference video URL. Only .mp4/.mov formats, 3-10s duration, 720-2160px resolution, max 200MB.","type":"string"}},"required":["video_url","prompt"],"type":"object"}},{"title":"Kling | o3 | Standard | Video to Video | Edit","slug":"kling-o3-standard-video-to-video-edit","version":"0.0.1","output_type":"video","request_schema":{"properties":{"elements":{"default":false,"description":"Elements (characters/objects) to include. Reference in prompt as @Element1, @Element2.","items":{"properties":{"frontal_image_url":{"component":"image","name":"frontal_image_url","order":2,"required":true,"title":"frontal_image_url","type":"string"},"reference_image_urls":{"array_items_json":"{\n  \"type\": \"image\"\n}","component":"array_input","maximum":3,"minimum":1,"name":"reference_image_urls","order":1,"required":true,"title":"reference_image_urls","type":"array"}},"required":["reference_image_urls","frontal_image_url"],"type":"object"},"maxItems":4,"type":"array"},"image_urls":{"default":false,"description":"Reference images for style/appearance. Reference in prompt as @Image1, @Image2, etc. Maximum 4 total (elements + reference images) when using video.","items":{"type":"image"},"maxItems":4,"type":"array"},"keep_audio":{"default":true,"description":"Whether to keep the original audio from the reference video.","type":"boolean"},"prompt":{"default":"false","description":"Text prompt for video generation. Reference video as @Video1.","type":"string"},"shot_type":{"default":"customize","description":"The type of multi-shot video generation.","enum":["customize"],"type":"string"},"video_url":{"default":"false","description":"Reference video URL. Only .mp4/.mov formats, 3-10s duration, 720-2160px resolution, max 200MB.","type":"string"}},"required":["video_url","prompt"],"type":"object"}},{"title":"Kling | o3 | Standard | Image to Video","slug":"kling-o3-standard-image-to-video","version":"0.0.1","output_type":"video","request_schema":{"properties":{"duration":{"default":"5","description":"Video duration in seconds (3-15s).","enum":["3","4","5","6","7","8","9","10","11","12","13","14","15"],"type":"string"},"end_image_url":{"default":"","description":"URL of the end frame image (optional). Note: This field is not used when multi-prompt is selected.","type":"string"},"generate_audio":{"default":true,"description":"Whether to generate native audio for the video.","type":"boolean"},"image_url":{"default":"","description":"URL of the start frame image.","type":"string"},"multi_prompt":{"default":"","description":"List of prompts for multi-shot video generation.","items":{"properties":{"duration":{"component":"select","name":"duration","options":[3,4,5,6,7,8,9,10,11,12,13,14,15],"order":1,"required":true,"title":"Duration","type":"string"},"prompt":{"component":"input","name":"prompt","order":0,"required":true,"title":"Prompt","type":"string"}},"required":["prompt","duration"],"type":"object"},"maxItems":5,"type":"array"},"prompt":{"default":"","description":"Text prompt for video generation. Either prompt or multi_prompt must be provided, but not both.","type":"string"},"shot_type":{"default":"customize","description":"The type of multi-shot video generation.","enum":["customize"],"type":"string"}},"required":["image_url"],"type":"object"}}]
