import spaces import torch import os from diffusers import DiffusionPipeline MODEL_ID = 'black-forest-labs/FLUX.1-dev' # Set custom cache directory to avoid filling Hugging Face storage limit CUSTOM_CACHE_DIR = './flux_cache' os.environ['HF_HOME'] = CUSTOM_CACHE_DIR os.environ['TRANSFORMERS_CACHE'] = CUSTOM_CACHE_DIR # Compile the model ahead-of-time for optimal performance (CPU version) @spaces.GPU(duration=1500) # Note: This might not work on CPU-only, but keeping for compatibility def compile_transformer(): # Load model with HF token if available and custom cache dir token = os.getenv('HF_TOKEN') pipe = DiffusionPipeline.from_pretrained( MODEL_ID, torch_dtype=torch.float32, # Use float32 for CPU token=token, cache_dir=CUSTOM_CACHE_DIR ) # Note: No .to('cuda') since CUDA is not available # Skip AoT compilation for CPU - it's not supported well return None # Load the model (CPU version) def load_model(): # Load model with HF token if available and custom cache dir token = os.getenv('HF_TOKEN') pipe = DiffusionPipeline.from_pretrained( MODEL_ID, torch_dtype=torch.float32, # Use float32 for CPU compatibility token=token, cache_dir=CUSTOM_CACHE_DIR ) # Note: No .to('cuda') - running on CPU # Skip AoT compilation for CPU # compiled_transformer = compile_transformer() # if compiled_transformer: # spaces.aoti_apply(compiled_transformer, pipe.transformer) return pipe # Note: Removed @spaces.GPU since CUDA is not available def generate_image(pipe, prompt): # Generate image with optimized settings for CPU image = pipe( prompt, num_inference_steps=10, # Even fewer steps for CPU speed guidance_scale=3.5, height=256, # Smaller size for CPU width=256 ).images[0] return image