Spaces:
Running
Running
| import os | |
| import base64 | |
| import io | |
| import json | |
| import math | |
| import time | |
| import tempfile | |
| import zipfile | |
| from typing import List, Optional, Tuple, Dict, Any | |
| from PIL import Image | |
| import cv2 | |
| import numpy as np | |
| from google import genai | |
| from google.genai import types | |
| from dotenv import load_dotenv | |
| # Load environment variables from .env file if present | |
| load_dotenv() | |
| # Initialize API Key from environment | |
| # Users should set GEMINI_API_KEY in their environment variables | |
| API_KEY = os.environ.get("GEMINI_API_KEY") | |
| def _get_client(): | |
| if not API_KEY: | |
| raise ValueError("GEMINI_API_KEY environment variable is not set.") | |
| return genai.Client(api_key=API_KEY) | |
| def generate_pixel_character( | |
| prompt_text: str, | |
| reference_image_b64: Optional[str] = None | |
| ) -> str: | |
| """ | |
| Generates a static 2D sprite character based on a text description in any art style. | |
| Args: | |
| prompt_text: Description of the character and style (e.g., "A cute cat wizard, cartoon style" or "anime cat hero"). | |
| reference_image_b64: Optional base64 string of a reference image to influence style. | |
| Returns: | |
| A base64 string of the generated PNG image. | |
| """ | |
| client = _get_client() | |
| base_instructions = ( | |
| "Generate a 2D game sprite character. The character should be flat 2D art, NOT 3D, " | |
| "NOT photorealistic. Character should be side view, full body, centered on a solid white background. " | |
| "No shadows, no 3D effects, no depth. The style should match what the user describes " | |
| "(pixel art, anime, cartoon, hand-drawn, vector, etc.). If no style is specified, use a clean cartoon style. " | |
| "Do NOT add text, UI, or watermarks." | |
| ) | |
| full_prompt = f"{prompt_text}\n\n{base_instructions}" if prompt_text else base_instructions | |
| # Config for Gemini 2.5 Flash Image (supports generate_content) | |
| model_id = "gemini-2.5-flash-image" | |
| contents = [types.Content(parts=[types.Part.from_text(text=full_prompt)])] | |
| if reference_image_b64: | |
| # Remove data URL prefix if present | |
| if "," in reference_image_b64: | |
| reference_image_b64 = reference_image_b64.split(",", 1)[1] | |
| image_bytes = base64.b64decode(reference_image_b64) | |
| contents[0].parts.append(types.Part.from_bytes(data=image_bytes, mime_type="image/png")) | |
| try: | |
| response = client.models.generate_content( | |
| model=model_id, | |
| contents=contents, | |
| config=types.GenerateContentConfig( | |
| temperature=0.3, | |
| max_output_tokens=8192 | |
| ) | |
| ) | |
| if not response.candidates or not response.candidates[0].content.parts: | |
| raise RuntimeError("No content generated by the model.") | |
| for part in response.candidates[0].content.parts: | |
| if part.inline_data: | |
| # Return raw base64 | |
| return base64.b64encode(part.inline_data.data).decode('utf-8') | |
| raise RuntimeError("Model returned content but no image data found.") | |
| except Exception as e: | |
| raise RuntimeError(f"Failed to generate character: {str(e)}") | |
| def animate_pixel_character( | |
| sprite_b64: str, | |
| animation_type: str = "idle", | |
| extra_prompt: str = "" | |
| ) -> str: | |
| """ | |
| Animates a static 2D sprite using Google's Veo model. | |
| Args: | |
| sprite_b64: Base64 string of the input static sprite (PNG). | |
| animation_type: One of "idle", "walk", "run", "jump". | |
| extra_prompt: Optional additional instructions for the motion. | |
| Returns: | |
| A base64 string of the generated video (MP4). | |
| """ | |
| client = _get_client() | |
| # Clean base64 | |
| if "," in sprite_b64: | |
| sprite_b64 = sprite_b64.split(",", 1)[1] | |
| sprite_bytes = base64.b64decode(sprite_b64) | |
| # Prompt construction (ported from original project) | |
| base_style = """ | |
| CRITICAL STYLE REQUIREMENTS - YOU MUST FOLLOW THESE EXACTLY: | |
| - This is a 2D game sprite animation. The input image is a 2D sprite. | |
| - The clip should be about 4 seconds long so it can be used as a game sprite animation. | |
| - You MUST maintain the EXACT same 2D art style as the input image (pixel art, anime, cartoon, etc.). | |
| - DO NOT make it 3D, DO NOT make it realistic, DO NOT add depth, DO NOT add shadows. | |
| - DO NOT add lighting, DO NOT add gradients, DO NOT add shine or gloss. | |
| - DO NOT render it in 3D style, DO NOT make it photorealistic. | |
| - The character must remain flat 2D throughout the entire animation, preserving the original art style. | |
| - Match the visual style, color palette, and aesthetic of the input image exactly. | |
| - Use a solid pure white background (#FFFFFF), completely flat, no gradient, no textures, no shadows, no ground line, no objects. | |
| - The character must stay centered, side view, and fill a reasonable portion of the frame. | |
| - No text, UI, logos, borders, or props. | |
| """.strip() | |
| motion_prompts = { | |
| "idle": """ | |
| ANIMATION TYPE: IDLE | |
| - The character must stand completely still in place. | |
| - Only animate a very subtle breathing motion: tiny up/down movement of the chest. | |
| - Optional: very slight idle sway (left/right) of the body, but minimal. | |
| - NO walking, NO movement across the screen, NO leg movement. | |
| - The animation should be a single seamless idle loop that starts and ends in almost the same pose so it can be looped cleanly. | |
| - Do not add any extra actions or transitions after the idle cycle finishes. | |
| - Keep the animation loop smooth and subtle. | |
| """, | |
| "walk": """ | |
| ANIMATION TYPE: WALK CYCLE | |
| - Animate a classic 2D side-scrolling walk cycle in place, side view. | |
| - The character walks on the spot (does NOT move across the screen). | |
| - Show clear leg alternation: left leg forward, right leg back, then switch. | |
| - Arms should swing opposite to legs (left arm forward when right leg forward). | |
| - The character's body should have a slight up/down bounce as they walk. | |
| - The video should contain one clean walk cycle that returns to the starting pose so it can loop seamlessly. | |
| - Avoid extra camera movement or additional actions at the end of the clip. | |
| - The character must stay centered in the frame throughout. | |
| """, | |
| "run": """ | |
| ANIMATION TYPE: RUN CYCLE | |
| - Animate a faster run cycle in place, side view. | |
| - The character runs faster than walking with more exaggerated motion. | |
| - Legs move faster with longer strides. | |
| - Arms pump more vigorously than walking. | |
| - Body has more pronounced up/down bounce. | |
| - The character stays centered and runs on the spot, like a classic game sprite. | |
| - The clip should be a single, smooth run cycle that ends in nearly the same pose as the first frame for looping. | |
| - Do not add extra motions or transitions after the run cycle is complete. | |
| """, | |
| "jump": """ | |
| ANIMATION TYPE: JUMP CYCLE | |
| - Animate a complete jump cycle in place: anticipation (squat down), jump up, hang time at peak, fall down, land, then settle back into the starting pose. | |
| - The character should compress slightly before jumping (anticipation). | |
| - At the peak of the jump, there should be a brief hang time. | |
| - The landing should have a slight compression/squat. | |
| - The clip should contain one complete jump cycle that ends very close to the initial idle pose, so it can be looped with other animations. | |
| - Avoid extra steps, actions, or camera moves after the landing. | |
| - NO camera movement, keep the character centered throughout. | |
| """ | |
| } | |
| selected_motion = motion_prompts.get(animation_type, motion_prompts["idle"]).strip() | |
| extra = f"\nAdditional user instruction: {extra_prompt.strip()}" if extra_prompt else "" | |
| full_prompt = f"{base_style}\n\n{selected_motion}\n\n{extra}" | |
| # Fallback chain for Veo models | |
| veo_models = [ | |
| "veo-3.1-fast-generate-preview", # Currently the best for this | |
| "veo-3.1-generate-preview" | |
| ] | |
| last_error = None | |
| for model_name in veo_models: | |
| try: | |
| print(f"Attempting animation with model: {model_name}") | |
| operation = client.models.generate_videos( | |
| model=model_name, | |
| prompt=full_prompt, | |
| image=types.Image( | |
| image_bytes=sprite_bytes, | |
| mime_type="image/png" | |
| ), | |
| config=types.GenerateVideosConfig( | |
| aspect_ratio="16:9", | |
| ) | |
| ) | |
| # Poll for completion - generate_videos returns an operation object | |
| # We need to poll until done | |
| while not operation.done: | |
| time.sleep(10) | |
| operation = client.operations.get(operation) | |
| # Extract video from completed operation | |
| if operation.response and operation.response.generated_videos: | |
| video = operation.response.generated_videos[0] | |
| video_obj = video.video | |
| # Try to get bytes directly | |
| if hasattr(video_obj, 'video_bytes') and video_obj.video_bytes: | |
| return base64.b64encode(video_obj.video_bytes).decode('utf-8') | |
| # Some SDK versions use 'data' attribute | |
| if hasattr(video_obj, 'data') and video_obj.data: | |
| return base64.b64encode(video_obj.data).decode('utf-8') | |
| # If video has a URI, we need to download it | |
| if hasattr(video_obj, 'uri') and video_obj.uri: | |
| # Download via the client's files API | |
| downloaded = client.files.download(file=video_obj) | |
| if hasattr(downloaded, 'read'): | |
| return base64.b64encode(downloaded.read()).decode('utf-8') | |
| elif isinstance(downloaded, bytes): | |
| return base64.b64encode(downloaded).decode('utf-8') | |
| # Try direct bytes access (varies by SDK version) | |
| if hasattr(video_obj, 'bytes') and video_obj.bytes: | |
| return base64.b64encode(video_obj.bytes).decode('utf-8') | |
| raise RuntimeError("Video generated but could not extract bytes from response") | |
| # Also check operation.result for some SDK versions | |
| if hasattr(operation, 'result') and operation.result: | |
| if hasattr(operation.result, 'generated_videos') and operation.result.generated_videos: | |
| video = operation.result.generated_videos[0] | |
| video_obj = video.video | |
| if hasattr(video_obj, 'video_bytes') and video_obj.video_bytes: | |
| return base64.b64encode(video_obj.video_bytes).decode('utf-8') | |
| if hasattr(video_obj, 'bytes') and video_obj.bytes: | |
| return base64.b64encode(video_obj.bytes).decode('utf-8') | |
| raise RuntimeError("Operation completed but no video data found in response") | |
| except Exception as e: | |
| print(f"Model {model_name} failed: {e}") | |
| last_error = e | |
| continue | |
| raise RuntimeError(f"All animation models failed. Last error: {last_error}") | |
| def extract_sprite_frames( | |
| video_b64: str, | |
| fps: int = 8 | |
| ) -> Tuple[str, List[str]]: | |
| """ | |
| Extracts frames from an MP4 video and returns a ZIP file and list of frame images. | |
| Uses OpenCV for fast in-memory video decoding without subprocess overhead. | |
| Args: | |
| video_b64: Base64 string of the MP4 video. | |
| fps: Frames per second to extract (default 8). | |
| Returns: | |
| Tuple containing: | |
| 1. Base64 string of the ZIP file containing all frames. | |
| 2. List of Base64 strings for the individual frames (for preview). | |
| """ | |
| if "," in video_b64: | |
| video_b64 = video_b64.split(",", 1)[1] | |
| video_bytes = base64.b64decode(video_b64) | |
| # OpenCV requires a file path, so we use a minimal temp file | |
| # This is still faster than subprocess + multiple disk writes | |
| with tempfile.NamedTemporaryFile(suffix='.mp4', delete=False) as tmp: | |
| tmp.write(video_bytes) | |
| tmp_path = tmp.name | |
| try: | |
| cap = cv2.VideoCapture(tmp_path) | |
| if not cap.isOpened(): | |
| raise RuntimeError("Failed to open video file") | |
| video_fps = cap.get(cv2.CAP_PROP_FPS) | |
| if video_fps <= 0: | |
| video_fps = 30 # Fallback | |
| # Calculate frame interval to achieve target fps | |
| frame_interval = max(1, int(round(video_fps / fps))) | |
| frames_b64 = [] | |
| png_buffers = [] | |
| frame_idx = 0 | |
| extracted_count = 0 | |
| while True: | |
| ret, frame = cap.read() | |
| if not ret: | |
| break | |
| # Extract frame at the right interval | |
| if frame_idx % frame_interval == 0: | |
| # Convert BGR (OpenCV) to RGB | |
| frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) | |
| img = Image.fromarray(frame_rgb) | |
| # Encode to PNG in memory with fast compression | |
| buffer = io.BytesIO() | |
| img.save(buffer, format='PNG', optimize=False, compress_level=1) | |
| png_data = buffer.getvalue() | |
| frames_b64.append(base64.b64encode(png_data).decode('utf-8')) | |
| png_buffers.append((f"frame_{extracted_count:03d}.png", png_data)) | |
| extracted_count += 1 | |
| frame_idx += 1 | |
| cap.release() | |
| if extracted_count == 0: | |
| raise RuntimeError("No frames extracted from video") | |
| # Create ZIP in memory | |
| zip_buffer = io.BytesIO() | |
| with zipfile.ZipFile(zip_buffer, 'w', zipfile.ZIP_DEFLATED, compresslevel=1) as zf: | |
| for filename, png_data in png_buffers: | |
| zf.writestr(filename, png_data) | |
| zip_b64 = base64.b64encode(zip_buffer.getvalue()).decode('utf-8') | |
| return zip_b64, frames_b64 | |
| finally: | |
| # Clean up temp file | |
| os.unlink(tmp_path) | |