Spaces:
Running
on
Zero
Running
on
Zero
File size: 10,118 Bytes
888f9e4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 |
"""
Example usage of VINE HuggingFace interface with pretrained VINE weights
This script demonstrates how to use the VINE model with your pretrained weights
from the ensemble format or from video-fm/vine_v0.
"""
import os
import sys
import torch
from transformers import pipeline
from transformers.pipelines import PIPELINE_REGISTRY
# Set your OpenAI API key here or via environment variable
#os.environ['OPENAI_API_KEY'] = "dummy-key"
# Add the parent directory to the path to import vine_hf
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from vine_hf import VineConfig, VineModel, VinePipeline
def example_with_local_pretrained_weights():
print("=== Using Local Pretrained VINE Weights ===")
# Download https://huggingface.co/video-fm/vine_v0/tree/main/laser_model_v1.pt
pretrained_vine_file = "/path/to/your/local/laser_model_v1.pt" # Replace with your local path
# Create configuration with your pretrained path (local file)
config = VineConfig(
model_name="openai/clip-vit-base-patch32",
segmentation_method="grounding_dino_sam2",
target_fps=1,
visualize=True,
visualization_dir="path/to/visualization/dir",
debug_visualizations=True,
use_hf_repo=False,
local_dir=os.path.dirname(pretrained_vine_file),
local_filename=os.path.basename(pretrained_vine_file),
)
# Method 1: Initialize model directly
print("Method 1: Direct model initialization")
vine_model = VineModel(config)
print(f"β Model initialized with pretrained weights from: {pretrained_vine_file}")
# Method 2: Use the from_pretrained_vine class method
print("\nMethod 2: Using from_pretrained_vine class method")
vine_model_2 = VineModel.from_pretrained_vine(
model_path=pretrained_vine_file,
config=config,
epoch=0 # Specify epoch number
)
print("β Model loaded using from_pretrained_vine method")
return vine_model
def example_with_huggingface_hub():
"""Example using VINE weights from HuggingFace Hub."""
print("\n=== Using HuggingFace Hub Weights ===")
# Create configuration to use HuggingFace Hub weights
config = VineConfig(
model_name="openai/clip-vit-base-patch32",
use_hf_repo=True,
model_repo="video-fm/vine_v0", # Your HF Hub model
segmentation_method="grounding_dino_sam2",
visualize=True,
visualization_dir="path/to/visualization/dir",
debug_visualizations=True,
)
try:
# Initialize model (will try to load from HF Hub)
vine_model = VineModel(config)
print("β Model loaded from HuggingFace Hub: video-fm/vine_v0")
return vine_model
except Exception as e:
print(f"β Could not load from HuggingFace Hub: {e}")
print("Make sure your model is pushed to video-fm/vine_v0")
return None
def example_pipeline_with_pretrained():
"""Example using pipeline with pretrained VINE weights."""
print("\n=== Pipeline with Pretrained VINE ===")
# Register the pipeline
PIPELINE_REGISTRY.register_pipeline(
"vine-video-understanding",
pipeline_class=VinePipeline,
pt_model=VineModel,
type="multimodal",
)
# Create configuration with your weights
pretrained_vine_file = "/path/to/your/local/laser_model_v1.pt" # Replace with your local path
config = VineConfig(
model_name="openai/clip-vit-base-patch32",
segmentation_method="grounding_dino_sam2",
visualize=True,
visualization_dir="path/to/visualization/dir",
debug_visualizations=True,
use_hf_repo=False,
local_dir=os.path.dirname(pretrained_vine_file),
local_filename=os.path.basename(pretrained_vine_file),
)
# Create model with pretrained weights
vine_model = VineModel(config)
# Create pipeline with segmentation model paths
vine_pipeline = VinePipeline(
model=vine_model,
tokenizer=None,
sam_config_path="path/to/sam2/configs/sam2.1_hiera_b+.yaml",
sam_checkpoint_path="path/to/sam2/checkpoints/sam2.1_hiera_base_plus.pt",
gd_config_path="path/to/GroundingDINO/config/GroundingDINO_SwinT_OGC.py",
gd_checkpoint_path="path/to/GroundingDINO/checkpoints/groundingdino_swint_ogc.pth",
device=0
)
print("β Pipeline created with pretrained VINE weights")
# Example usage (would require actual video file)
demo_video = os.path.join(os.path.dirname(__file__), "../demo/videos/v1.mp4")
if os.path.exists(demo_video):
print(f"Found demo video: {demo_video}")
print("Example pipeline call:")
print(f"results = vine_pipeline(")
print(f" '{demo_video}',")
print(f" categorical_keywords=['human', 'dog', 'frisbee'],")
print(f" unary_keywords=['running', 'jumping', 'sitting'],")
print(f" binary_keywords=['behind', 'chasing', 'next to']")
print(f" debug_visualizations=True")
print(f")")
# Uncomment to actually run (requires segmentation models)
# results = vine_pipeline(
# demo_video,
# categorical_keywords=['human', 'dog', 'frisbee'],
# unary_keywords=['running', 'jumping', 'sitting'],
# binary_keywords=['behind', 'chasing', 'next to'],
# debug_visualizations=True,
# )
# print("Results:", results['summary'])
return vine_pipeline
def example_manual_weight_loading():
"""Example of manually loading weights after model creation."""
print("\n=== Manual Weight Loading ===")
# Create model with base CLIP weights
# No pretrained path: create base config (no HF repo or local file configured)
config = VineConfig()
vine_model = VineModel(config)
print("β Model created with base CLIP weights")
model_dir = "/path/to/your/local/ensemble/model_dir.pt" # Replace with your model directory
if os.path.exists(model_dir):
success = vine_model.load_pretrained_vine_weights(model_dir, epoch=0)
if success:
print("β Successfully loaded pretrained VINE weights manually")
else:
print("β Failed to load pretrained weights")
else:
print(f"β Model directory not found: {model_dir}")
return vine_model
def compare_model_outputs():
"""Compare outputs between base CLIP and pretrained VINE."""
print("\n=== Comparing Model Outputs ===")
# Create dummy data for testing
video_frames = torch.randn(3, 224, 224, 3) * 255 # 3 frames
video_frames = video_frames.clamp(0, 255).byte()
masks = {
0: {1: torch.ones(224, 224, 1)},
1: {1: torch.ones(224, 224, 1)},
2: {1: torch.ones(224, 224, 1)}
}
bboxes = {
0: {1: [50, 50, 150, 150]},
1: {1: [52, 52, 152, 152]},
2: {1: [54, 54, 154, 154]}
}
keywords = ['human', 'dog', 'frisbee']
# Model 1: Base CLIP
print("Creating model with base CLIP weights...")
config_base = VineConfig()
model_base = VineModel(config_base)
# Model 2: Pretrained VINE (if available)
data_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), "../../data"))
model_dir = os.path.join(data_dir, "LLaVA-Video-178K-v2/models/ensemble-02-10")
if os.path.exists(model_dir):
print("Creating model with pretrained VINE weights...")
config_vine = VineConfig(
use_hf_repo=False,
local_dir=model_dir,
local_filename=None,
)
model_vine = VineModel(config_vine)
print("\nComparing predictions...")
# Get predictions from both models
with torch.no_grad():
results_base = model_base.predict(
video_frames=video_frames,
masks=masks,
bboxes=bboxes,
categorical_keywords=keywords,
return_top_k=3
)
results_vine = model_vine.predict(
video_frames=video_frames,
masks=masks,
bboxes=bboxes,
categorical_keywords=keywords,
return_top_k=3
)
print("Base CLIP confidence scores:", results_base['confidence_scores'])
print("Pretrained VINE confidence scores:", results_vine['confidence_scores'])
print("β Successfully compared both models")
else:
print(f"Pretrained model not found at: {model_dir}")
print("Skipping comparison")
if __name__ == "__main__":
print("VINE HuggingFace Interface - Pretrained Weights Examples")
print("=" * 60)
try:
# Test local pretrained weights
model1 = example_with_local_pretrained_weights()
except Exception as e:
print(f"Local weights example failed: {e}")
try:
# Test HuggingFace Hub weights
model2 = example_with_huggingface_hub()
except Exception as e:
print(f"HuggingFace Hub example failed: {e}")
try:
# Test pipeline with pretrained weights
pipeline = example_pipeline_with_pretrained()
except Exception as e:
print(f"Pipeline example failed: {e}")
# try:
# # Test manual weight loading
# #model3 = example_manual_weight_loading()
# except Exception as e:
# print(f"Manual loading example failed: {e}")
# try:
# # Compare model outputs
# #compare_model_outputs()
# except Exception as e:
# print(f"Comparison example failed: {e}")
print("\n" + "=" * 60)
print("Examples completed!")
print("\nUsage Summary:")
print("1. Configure VineConfig with `use_hf_repo` + `model_repo` for Hub models, or `use_hf_repo=False` + `local_dir`/`local_filename` for local weights")
print("2. Use VineModel.from_pretrained_vine() for direct loading")
|