LASER / test_vine.py
moqingyan123
final fixes
888f9e4
raw
history blame
2.5 kB
"""
Test script for VINE model loaded from HuggingFace Hub
"""
import os
import sys
from pathlib import Path
import torch
os.environ['OPENAI_API_KEY'] = "dummy-key"
# Add src to path
sys.path.insert(0, str(Path(__file__).parent / "src"))
# Determine device
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")
print("=" * 80)
print("Testing VINE Model from video-fm/vine")
print("=" * 80)
# Load VINE from HuggingFace
print("\n1. Loading VINE model from HuggingFace Hub...")
from transformers import AutoModel, AutoConfig
# Load config and set device properly
config = AutoConfig.from_pretrained('video-fm/vine', trust_remote_code=True)
config._device = device # Override the device setting
# Load model with config
model = AutoModel.from_pretrained('video-fm/vine', config=config, trust_remote_code=True)
print("βœ“ Model loaded successfully")
# Verify checkpoint files
print("\n2. Verifying checkpoint files...")
checkpoint_dir = Path(__file__).parent / "checkpoints"
checkpoints = {
"SAM2 config": checkpoint_dir / "sam2_hiera_t.yaml",
"SAM2 checkpoint": checkpoint_dir / "sam2_hiera_tiny.pt",
"GroundingDINO config": checkpoint_dir / "GroundingDINO_SwinT_OGC.py",
"GroundingDINO checkpoint": checkpoint_dir / "groundingdino_swint_ogc.pth",
}
all_exist = True
for name, path in checkpoints.items():
if path.exists():
size_mb = path.stat().st_size / (1024 * 1024)
print(f"βœ“ {name}: {path.name} ({size_mb:.1f} MB)")
else:
print(f"βœ— {name}: NOT FOUND at {path}")
all_exist = False
# Create pipeline
print("\n3. Creating VINE pipeline...")
from vine_hf import VinePipeline
pipeline = VinePipeline(
model=model,
tokenizer=None,
sam_config_path=str(checkpoints["SAM2 config"]),
sam_checkpoint_path=str(checkpoints["SAM2 checkpoint"]),
gd_config_path=str(checkpoints["GroundingDINO config"]),
gd_checkpoint_path=str(checkpoints["GroundingDINO checkpoint"]),
device=device,
trust_remote_code=True
)
print("βœ“ Pipeline created successfully")
print("\n" + "=" * 80)
print("βœ… VINE Setup Complete and Working!")
print("=" * 80)
print("\nYou can now use the model for video understanding:")
print("""
from transformers import AutoModel
from vine_hf import VinePipeline
model = AutoModel.from_pretrained('video-fm/vine', trust_remote_code=True)
pipeline = VinePipeline(model=model, ...)
results = pipeline('video.mp4', categorical_keywords=['person', 'dog'], ...)
""")