LASER / src /vine_hf /example_with_pretrained_vine.py
moqingyan123
final fixes
888f9e4
raw
history blame
10.1 kB
"""
Example usage of VINE HuggingFace interface with pretrained VINE weights
This script demonstrates how to use the VINE model with your pretrained weights
from the ensemble format or from video-fm/vine_v0.
"""
import os
import sys
import torch
from transformers import pipeline
from transformers.pipelines import PIPELINE_REGISTRY
# Set your OpenAI API key here or via environment variable
#os.environ['OPENAI_API_KEY'] = "dummy-key"
# Add the parent directory to the path to import vine_hf
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from vine_hf import VineConfig, VineModel, VinePipeline
def example_with_local_pretrained_weights():
print("=== Using Local Pretrained VINE Weights ===")
# Download https://huggingface.co/video-fm/vine_v0/tree/main/laser_model_v1.pt
pretrained_vine_file = "/path/to/your/local/laser_model_v1.pt" # Replace with your local path
# Create configuration with your pretrained path (local file)
config = VineConfig(
model_name="openai/clip-vit-base-patch32",
segmentation_method="grounding_dino_sam2",
target_fps=1,
visualize=True,
visualization_dir="path/to/visualization/dir",
debug_visualizations=True,
use_hf_repo=False,
local_dir=os.path.dirname(pretrained_vine_file),
local_filename=os.path.basename(pretrained_vine_file),
)
# Method 1: Initialize model directly
print("Method 1: Direct model initialization")
vine_model = VineModel(config)
print(f"βœ“ Model initialized with pretrained weights from: {pretrained_vine_file}")
# Method 2: Use the from_pretrained_vine class method
print("\nMethod 2: Using from_pretrained_vine class method")
vine_model_2 = VineModel.from_pretrained_vine(
model_path=pretrained_vine_file,
config=config,
epoch=0 # Specify epoch number
)
print("βœ“ Model loaded using from_pretrained_vine method")
return vine_model
def example_with_huggingface_hub():
"""Example using VINE weights from HuggingFace Hub."""
print("\n=== Using HuggingFace Hub Weights ===")
# Create configuration to use HuggingFace Hub weights
config = VineConfig(
model_name="openai/clip-vit-base-patch32",
use_hf_repo=True,
model_repo="video-fm/vine_v0", # Your HF Hub model
segmentation_method="grounding_dino_sam2",
visualize=True,
visualization_dir="path/to/visualization/dir",
debug_visualizations=True,
)
try:
# Initialize model (will try to load from HF Hub)
vine_model = VineModel(config)
print("βœ“ Model loaded from HuggingFace Hub: video-fm/vine_v0")
return vine_model
except Exception as e:
print(f"βœ— Could not load from HuggingFace Hub: {e}")
print("Make sure your model is pushed to video-fm/vine_v0")
return None
def example_pipeline_with_pretrained():
"""Example using pipeline with pretrained VINE weights."""
print("\n=== Pipeline with Pretrained VINE ===")
# Register the pipeline
PIPELINE_REGISTRY.register_pipeline(
"vine-video-understanding",
pipeline_class=VinePipeline,
pt_model=VineModel,
type="multimodal",
)
# Create configuration with your weights
pretrained_vine_file = "/path/to/your/local/laser_model_v1.pt" # Replace with your local path
config = VineConfig(
model_name="openai/clip-vit-base-patch32",
segmentation_method="grounding_dino_sam2",
visualize=True,
visualization_dir="path/to/visualization/dir",
debug_visualizations=True,
use_hf_repo=False,
local_dir=os.path.dirname(pretrained_vine_file),
local_filename=os.path.basename(pretrained_vine_file),
)
# Create model with pretrained weights
vine_model = VineModel(config)
# Create pipeline with segmentation model paths
vine_pipeline = VinePipeline(
model=vine_model,
tokenizer=None,
sam_config_path="path/to/sam2/configs/sam2.1_hiera_b+.yaml",
sam_checkpoint_path="path/to/sam2/checkpoints/sam2.1_hiera_base_plus.pt",
gd_config_path="path/to/GroundingDINO/config/GroundingDINO_SwinT_OGC.py",
gd_checkpoint_path="path/to/GroundingDINO/checkpoints/groundingdino_swint_ogc.pth",
device=0
)
print("βœ“ Pipeline created with pretrained VINE weights")
# Example usage (would require actual video file)
demo_video = os.path.join(os.path.dirname(__file__), "../demo/videos/v1.mp4")
if os.path.exists(demo_video):
print(f"Found demo video: {demo_video}")
print("Example pipeline call:")
print(f"results = vine_pipeline(")
print(f" '{demo_video}',")
print(f" categorical_keywords=['human', 'dog', 'frisbee'],")
print(f" unary_keywords=['running', 'jumping', 'sitting'],")
print(f" binary_keywords=['behind', 'chasing', 'next to']")
print(f" debug_visualizations=True")
print(f")")
# Uncomment to actually run (requires segmentation models)
# results = vine_pipeline(
# demo_video,
# categorical_keywords=['human', 'dog', 'frisbee'],
# unary_keywords=['running', 'jumping', 'sitting'],
# binary_keywords=['behind', 'chasing', 'next to'],
# debug_visualizations=True,
# )
# print("Results:", results['summary'])
return vine_pipeline
def example_manual_weight_loading():
"""Example of manually loading weights after model creation."""
print("\n=== Manual Weight Loading ===")
# Create model with base CLIP weights
# No pretrained path: create base config (no HF repo or local file configured)
config = VineConfig()
vine_model = VineModel(config)
print("βœ“ Model created with base CLIP weights")
model_dir = "/path/to/your/local/ensemble/model_dir.pt" # Replace with your model directory
if os.path.exists(model_dir):
success = vine_model.load_pretrained_vine_weights(model_dir, epoch=0)
if success:
print("βœ“ Successfully loaded pretrained VINE weights manually")
else:
print("βœ— Failed to load pretrained weights")
else:
print(f"βœ— Model directory not found: {model_dir}")
return vine_model
def compare_model_outputs():
"""Compare outputs between base CLIP and pretrained VINE."""
print("\n=== Comparing Model Outputs ===")
# Create dummy data for testing
video_frames = torch.randn(3, 224, 224, 3) * 255 # 3 frames
video_frames = video_frames.clamp(0, 255).byte()
masks = {
0: {1: torch.ones(224, 224, 1)},
1: {1: torch.ones(224, 224, 1)},
2: {1: torch.ones(224, 224, 1)}
}
bboxes = {
0: {1: [50, 50, 150, 150]},
1: {1: [52, 52, 152, 152]},
2: {1: [54, 54, 154, 154]}
}
keywords = ['human', 'dog', 'frisbee']
# Model 1: Base CLIP
print("Creating model with base CLIP weights...")
config_base = VineConfig()
model_base = VineModel(config_base)
# Model 2: Pretrained VINE (if available)
data_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), "../../data"))
model_dir = os.path.join(data_dir, "LLaVA-Video-178K-v2/models/ensemble-02-10")
if os.path.exists(model_dir):
print("Creating model with pretrained VINE weights...")
config_vine = VineConfig(
use_hf_repo=False,
local_dir=model_dir,
local_filename=None,
)
model_vine = VineModel(config_vine)
print("\nComparing predictions...")
# Get predictions from both models
with torch.no_grad():
results_base = model_base.predict(
video_frames=video_frames,
masks=masks,
bboxes=bboxes,
categorical_keywords=keywords,
return_top_k=3
)
results_vine = model_vine.predict(
video_frames=video_frames,
masks=masks,
bboxes=bboxes,
categorical_keywords=keywords,
return_top_k=3
)
print("Base CLIP confidence scores:", results_base['confidence_scores'])
print("Pretrained VINE confidence scores:", results_vine['confidence_scores'])
print("βœ“ Successfully compared both models")
else:
print(f"Pretrained model not found at: {model_dir}")
print("Skipping comparison")
if __name__ == "__main__":
print("VINE HuggingFace Interface - Pretrained Weights Examples")
print("=" * 60)
try:
# Test local pretrained weights
model1 = example_with_local_pretrained_weights()
except Exception as e:
print(f"Local weights example failed: {e}")
try:
# Test HuggingFace Hub weights
model2 = example_with_huggingface_hub()
except Exception as e:
print(f"HuggingFace Hub example failed: {e}")
try:
# Test pipeline with pretrained weights
pipeline = example_pipeline_with_pretrained()
except Exception as e:
print(f"Pipeline example failed: {e}")
# try:
# # Test manual weight loading
# #model3 = example_manual_weight_loading()
# except Exception as e:
# print(f"Manual loading example failed: {e}")
# try:
# # Compare model outputs
# #compare_model_outputs()
# except Exception as e:
# print(f"Comparison example failed: {e}")
print("\n" + "=" * 60)
print("Examples completed!")
print("\nUsage Summary:")
print("1. Configure VineConfig with `use_hf_repo` + `model_repo` for Hub models, or `use_hf_repo=False` + `local_dir`/`local_filename` for local weights")
print("2. Use VineModel.from_pretrained_vine() for direct loading")