SNIPED_grasp-any-region / requirements.txt
jbilcke-hf's picture
Upload core files for paper 2510.18876
46861c5 verified
# Gradio (REQUIRED - specific version)
gradio==5.49.1
# ZeroGPU support
spaces==0.30.4
# PyTorch (pinned for FlashAttention compatibility)
torch==2.8.0
torchvision
torchaudio
# FlashAttention (prebuilt wheel for PyTorch 2.8, Python 3.10, CUDA 12)
flash-attn @ https://github.com/Dao-AILab/flash-attention/releases/download/v2.8.3/flash_attn-2.8.3+cu12torch2.8cxx11abiFALSE-cp310-cp310-linux_x86_64.whl
# Transformers and ML libraries
transformers==4.56.2
accelerate>=0.28.0
timm==1.0.19
peft==0.15.2
# Vision and image processing
opencv-python
pillow>=9.4.0
pycocotools
git+https://github.com/cocodataset/panopticapi.git
git+https://github.com/facebookresearch/segment-anything.git
# Data handling
numpy==1.26.4
pydantic>=2.10.1
protobuf<=3.20.3
sentencepiece
# Utilities
httpx>=0.24.1,<1.0
requests
inflect
distinctipy
attrdict
# Video processing (optional)
pytorchvideo
# VLLM (optional - for evaluation)
# vllm==0.7.3
# xTuner (for training - not needed for inference)
# xtuner==0.2.0rc0