version: '3.8' services: router-api: build: context: . dockerfile: Dockerfile ports: - "7860:7860" environment: - PORT=7860 - MAX_NEW_TOKENS=600 - DEFAULT_TEMPERATURE=0.2 - DEFAULT_TOP_P=0.9 - HF_TOKEN=${HF_TOKEN:-} volumes: # Mount cache directory to persist model downloads - ~/.cache/huggingface:/root/.cache/huggingface # Uncomment below for GPU support (requires nvidia-docker) # deploy: # resources: # reservations: # devices: # - driver: nvidia # count: 1 # capabilities: [gpu] restart: unless-stopped