import gradio as gr from diffusers import DiffusionPipeline import torch import os # Ensure necessary libraries are installed # pip install diffusers --upgrade # pip install invisible_watermark transformers accelerate safetensors gradio torch model_id = "stabilityai/stable-diffusion-xl-base-1.0" # Determine device and dtype if torch.cuda.is_available(): device = "cuda" dtype = torch.float16 print("Using CUDA (GPU).") # elif torch.backends.mps.is_available(): # Uncomment for MacOS Metal support # device = "mps" # dtype = torch.float16 # print("Using MPS (Apple Silicon GPU).") else: device = "cpu" dtype = torch.float32 print("Using CPU.") # Load the Stable Diffusion XL pipeline # Using float16 and safetensors for efficiency if on GPU # variant="fp16" loads the fp16 weights try: pipe = DiffusionPipeline.from_pretrained( model_id, torch_dtype=dtype, use_safetensors=True, variant="fp16" if device!= "cpu" else None # Only use fp16 variant if not on CPU ) pipe.to(device) # Optional: Enable CPU offloading if VRAM is limited (only works on CUDA) if device == "cuda": try: # Check VRAM - this is a rough estimate, adjust threshold as needed total_vram_gb = torch.cuda.get_device_properties(0).total_memory / (1024**3) if total_vram_gb < 10: # Example threshold: less than 10GB VRAM print(f"Low VRAM ({total_vram_gb:.2f}GB detected). Enabling model CPU offload.") pipe.enable_model_cpu_offload() except Exception as offload_err: print(f"Could not check VRAM or enable offload: {offload_err}") # Optional: Use torch.compile for speedup (requires torch >= 2.0) # if device!= "cpu" and hasattr(torch, "compile"): # try: # print("Attempting to compile the UNet...") # pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True) # print("UNet compiled successfully.") # except Exception as compile_err: # print(f"Torch compile failed: {compile_err}") print(f"SDXL pipeline loaded successfully on {device}.") except Exception as e: print(f"Error loading SDXL pipeline: {e}") pipe = None def generate_image(prompt): """Generates an image based on the text prompt.""" if pipe is None: # Handle case where pipeline failed to load # Create a placeholder image or return an error message from PIL import Image, ImageDraw, ImageFont img = Image.new('RGB', (512, 512), color = (200, 200, 200)) d = ImageDraw.Draw(img) try: # Try to load a default font fnt = ImageFont.truetype("arial.ttf", 15) except IOError: fnt = ImageFont.load_default() d.text((10,10), "Error: Model pipeline failed to load.", fill=(255,0,0), font=fnt) return img if not prompt: return None # Return nothing if prompt is empty print(f"Generating image for prompt: '{prompt}'") try: # Generate the image # Using default steps/guidance scale, can be customized with torch.inference_mode(): # Use inference mode for efficiency image = pipe(prompt=prompt, num_inference_steps=30).images print("Image generated successfully.") return image except Exception as e: print(f"Error during image generation: {e}") # Return an error image or message from PIL import Image, ImageDraw, ImageFont img = Image.new('RGB', (512, 512), color = (200, 200, 200)) d = ImageDraw.Draw(img) try: fnt = ImageFont.truetype("arial.ttf", 15) except IOError: fnt = ImageFont.load_default() d.text((10,10), f"Error generating image:\n{e}", fill=(255,0,0), font=fnt) return img # Create the Gradio interface demo = gr.Interface( fn=generate_image, inputs=gr.Textbox(label="Enter Text Prompt", placeholder="e.g., 'An astronaut riding a green horse'"), outputs=gr.Image(label="Generated Image", type="pil"), title="Text-to-Image Generation with Stable Diffusion XL", description=f"Generate images from text prompts using the {model_id} model. Loading and inference might take a moment, especially on the first run or on CPU.", examples=["A high-tech cityscape at sunset, cinematic lighting"] ) if __name__ == "__main__": # Launch the Gradio app demo.launch(debug=True)