Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| import torch | |
| from diffusers import DiffusionPipeline | |
| # ---------------------- MODEL INITIALIZATION ---------------------- | |
| # Use 'balanced' for multi-device setups and CPU fallback for Spaces without GPU | |
| device_map = "balanced" if torch.cuda.is_available() else "cpu" | |
| flux_model = DiffusionPipeline.from_pretrained( | |
| "black-forest-labs/FLUX.1-dev", | |
| torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32, | |
| device_map=device_map | |
| ) | |
| omni_model = DiffusionPipeline.from_pretrained( | |
| "tencent/OmniAvatar", | |
| torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32, | |
| device_map=device_map | |
| ) | |
| # ---------------------- MAIN GENERATION FUNCTION ---------------------- | |
| def generate_video(image, audio, prompt, style="claymation"): | |
| device = "cuda" if torch.cuda.is_available() else "cpu" | |
| flux_model.to(device) | |
| omni_model.to(device) | |
| try: | |
| # Step 1: Stylize input image using FLUX-Kontext | |
| stylized_image = flux_model( | |
| prompt=prompt, | |
| image=image, | |
| guidance_scale=7.5, | |
| num_inference_steps=30 | |
| ).images[0] | |
| # Step 2: Animate the stylized image with OmniAvatar | |
| result = omni_model( | |
| image=stylized_image, | |
| audio=audio, | |
| style=style, | |
| ) | |
| # Return the generated video if available | |
| if isinstance(result, dict) and "video" in result: | |
| return result["video"] | |
| elif hasattr(result, "videos"): | |
| return result.videos[0] | |
| else: | |
| return f"β οΈ Unexpected output format: {type(result)}" | |
| except Exception as e: | |
| return f"β οΈ Error during generation: {str(e)}" | |
| # ---------------------- GRADIO UI ---------------------- | |
| with gr.Blocks(title="π Claymation Talking Avatar Generator") as demo: | |
| gr.Markdown(""" | |
| # π¬ Claymation Talking Avatar Generator | |
| Generate claymation-style speaking avatars using **FLUX-Kontext** for stylization | |
| and **OmniAvatar** for lip-synced animation. | |
| """) | |
| with gr.Row(): | |
| image_input = gr.Image(label="π§ Upload Character Image", type="filepath") | |
| audio_input = gr.Audio(label="π€ Upload Voice Audio", type="filepath") | |
| prompt = gr.Textbox( | |
| label="π Prompt (Optional)", | |
| value="A claymation character speaking realistically", | |
| placeholder="Describe the style or mood..." | |
| ) | |
| with gr.Row(): | |
| style_dropdown = gr.Dropdown( | |
| choices=["claymation", "toon", "realistic"], | |
| value="claymation", | |
| label="π¨ Style" | |
| ) | |
| generate_button = gr.Button("π Generate Video") | |
| video_output = gr.Video(label="π₯ Generated Output") | |
| generate_button.click( | |
| fn=generate_video, | |
| inputs=[image_input, audio_input, prompt, style_dropdown], | |
| outputs=video_output | |
| ) | |
| # ---------------------- LAUNCH ---------------------- | |
| demo.queue().launch(debug=True, share=False) | |