Spaces:
Running
on
Zero
Running
on
Zero
File size: 13,168 Bytes
888f9e4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 |
"""
Example demonstrating how to load and use VINE ensemble weights
This script shows the correct way to load your pretrained VINE ensemble weights
and use them with the HuggingFace interface, based on the actual inference.py workflow.
"""
import os
import sys
import torch
import numpy as np
from transformers.pipelines import PIPELINE_REGISTRY
#os.environ["OPENAI_API_KEY"]="dummy-key" # Set your OpenAI API key here or via environment variable
# Add the parent directory to the path to import vine_hf
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from vine_hf import VineConfig, VineModel, VinePipeline
from laser.loading import load_video
def example_load_ensemble_weights():
"""Example of loading ensemble weights correctly."""
print("=== Loading Ensemble VINE Weights ===")
# Path to your ensemble model (adjust this to your actual path)
data_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), "../../data"))
model_dir = os.path.join(data_dir, "LLaVA-Video-178K-v2/models/ensemble-02-10")
print(f"Looking for ensemble weights in: {model_dir}")
if os.path.exists(model_dir):
print("β Model directory found")
# List available model files
model_files = [f for f in os.listdir(model_dir) if f.endswith('.model')]
print(f"Available model files: {model_files}")
if model_files:
# Create configuration with ensemble path (local directory with .model files)
config = VineConfig(
segmentation_method="grounding_dino_sam2",
use_hf_repo=False,
local_dir=model_dir,
local_filename=None,
)
print("Creating VINE model with ensemble weights...")
vine_model = VineModel(config)
print("β VINE model created with ensemble weights!")
return vine_model
else:
print("β No .model files found in directory")
return None
else:
print(f"β Model directory not found: {model_dir}")
print("Please adjust the path to point to your ensemble weights")
return None
def example_direct_ensemble_loading():
"""Example of loading ensemble weights using from_pretrained_vine."""
print("\n=== Direct Ensemble Loading ===")
# Path to specific ensemble file
data_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), "../../data"))
model_dir = os.path.join(data_dir, "LLaVA-Video-178K-v2/models/ensemble-02-10")
if os.path.exists(model_dir):
try:
# Use the class method for direct loading
vine_model = VineModel.from_pretrained_vine(
model_path=model_dir,
epoch=0 # Load epoch 0
)
print("β Model loaded using from_pretrained_vine!")
return vine_model
except Exception as e:
print(f"β Error loading with from_pretrained_vine: {e}")
return None
else:
print(f"β Model directory not found: {model_dir}")
return None
def example_compare_original_vs_hf():
"""Compare the original inference.py approach with HuggingFace interface."""
print("\n=== Comparing Original vs HuggingFace Interface ===")
data_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), "../../data"))
model_dir = os.path.join(data_dir, "LLaVA-Video-178K-v2/models/ensemble-02-10")
model_name = "ensemble-2025-02-10-14-57-22"
epoch = 0
if not os.path.exists(model_dir):
print(f"Model directory not found: {model_dir}")
return
print("Original approach (from inference.py):")
print("```python")
print("def load_model(model_dir, model_name, epoch, device):")
print(" model_name = model_name + f'.{epoch}.model'")
print(" predicate_model = torch.load(os.path.join(model_dir, model_name), map_location=device, weights_only=False)")
print(" return predicate_model")
print("")
print("predicate_model = load_model(model_dir, model_name, epoch, device)")
print("```")
print("\nNew HuggingFace approach:")
print("```python")
print("config = VineConfig(pretrained_vine_path=model_dir)")
print("vine_model = VineModel(config)")
print("# or")
print("vine_model = VineModel.from_pretrained_vine(model_dir, epoch=0)")
print("```")
# Try to load with both approaches if possible
try:
# Original approach
def load_model(model_dir, model_name, epoch, device):
model_name = model_name + f'.{epoch}.model'
model_path = os.path.join(model_dir, model_name)
if os.path.exists(model_path):
return torch.load(model_path, map_location=device, weights_only=False)
else:
print(f"Model file not found: {model_path}")
return None
device = "cuda" if torch.cuda.is_available() else "cpu"
original_model = load_model(model_dir, model_name, epoch, device)
if original_model:
print(f"β Original model loaded: {type(original_model)}")
print(f" Has clip_cate_model: {hasattr(original_model, 'clip_cate_model')}")
print(f" Has clip_unary_model: {hasattr(original_model, 'clip_unary_model')}")
print(f" Has clip_binary_model: {hasattr(original_model, 'clip_binary_model')}")
# HuggingFace approach
vine_model = VineModel.from_pretrained_vine(model_dir, epoch=epoch)
if vine_model:
print(f"β HuggingFace model loaded: {type(vine_model)}")
print(f" Has clip_cate_model: {hasattr(vine_model, 'clip_cate_model')}")
print(f" Has clip_unary_model: {hasattr(vine_model, 'clip_unary_model')}")
print(f" Has clip_binary_model: {hasattr(vine_model, 'clip_binary_model')}")
print("\nβ Both approaches work! HuggingFace interface successfully loads ensemble weights.")
except Exception as e:
print(f"Error in comparison: {e}")
def example_ensemble_with_pipeline():
"""Example using ensemble weights with the pipeline."""
print("\n=== Using Ensemble Weights with Pipeline ===")
data_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), "../../data"))
model_dir = os.path.join(data_dir, "LLaVA-Video-178K-v2/models/ensemble-02-10")
if not os.path.exists(model_dir):
print(f"Model directory not found: {model_dir}")
return
# Register pipeline
PIPELINE_REGISTRY.register_pipeline(
"vine-video-understanding",
pipeline_class=VinePipeline,
pt_model=VineModel,
type="multimodal",
)
# Create model with ensemble weights (local directory)
config = VineConfig(
segmentation_method="grounding_dino_sam2",
use_hf_repo=False,
local_dir=model_dir,
local_filename=None,
)
vine_model = VineModel(config)
# Create pipeline with segmentation model paths
vine_pipeline = VinePipeline(
model=vine_model,
tokenizer=None,
# SAM2 configuration
sam_config_path="path/to/sam2/configs/sam2.1_hiera_b+.yaml",
sam_checkpoint_path="path/to/sam2/checkpoints/sam2.1_hiera_base_plus.pt",
# GroundingDINO configuration
gd_config_path="path/to/GroundingDINO/config/GroundingDINO_SwinT_OGC.py",
gd_checkpoint_path="path/to/GroundingDINO/checkpoints/groundingdino_swint_ogc.pth",
device="cuda" if torch.cuda.is_available() else "cpu",
)
print("β Pipeline created with ensemble VINE weights")
# Check for demo video
demo_video = os.path.join(os.path.dirname(__file__), "../demo/videos/v1.mp4")
if os.path.exists(demo_video):
print(f"Found demo video: {demo_video}")
# Use the same keywords as in the original inference.py
categorical_keywords = ['human', 'dog', 'frisbee']
unary_keywords = ['running', 'jumping', 'catching', 'throwing']
binary_keywords = ['behind', 'bite', 'front', 'jump over', 'right', 'left']
print("Example pipeline usage:")
print("```python")
print("results = vine_pipeline(")
print(f" '{demo_video}',")
print(f" categorical_keywords={categorical_keywords},")
print(f" unary_keywords={unary_keywords},")
print(f" binary_keywords={binary_keywords},")
print(" segmentation_method='grounding_dino_sam2'")
print(")")
print("```")
# Uncomment to actually run (requires segmentation models)
# try:
# results = vine_pipeline(
# demo_video,
# categorical_keywords=categorical_keywords,
# unary_keywords=unary_keywords,
# binary_keywords=binary_keywords,
# segmentation_method='grounding_dino_sam2'
# )
# print("Results:", results['summary'])
# except Exception as e:
# print(f"Pipeline execution failed: {e}")
# print("This is expected if segmentation models are not set up")
return vine_pipeline
def demonstrate_weight_transfer():
"""Demonstrate how weights are transferred from ensemble to HuggingFace format."""
print("\n=== Weight Transfer Demonstration ===")
print("The ensemble model structure (PredicateModel):")
print("- clip_cate_model: CLIP model for categorical classification")
print("- clip_unary_model: CLIP model for unary predicates")
print("- clip_binary_model: CLIP model for binary relations")
print("- clip_tokenizer: Tokenizer for text processing")
print("- clip_processor: Processor for image processing")
print("\nWeight transfer process:")
print("1. Load ensemble model with torch.load()")
print("2. Initialize base CLIP models in HuggingFace format")
print("3. Transfer state_dict from ensemble to HuggingFace models:")
print(" - ensemble.clip_cate_model β hf.clip_cate_model")
print(" - ensemble.clip_unary_model β hf.clip_unary_model")
print(" - ensemble.clip_binary_model β hf.clip_binary_model")
print("4. Transfer tokenizer and processor")
print("\nThis preserves all your fine-tuned weights while making them HuggingFace compatible!")
def troubleshooting_guide():
"""Provide troubleshooting guide for common issues."""
print("\n=== Troubleshooting Guide ===")
print("Common Issues:")
print("1. 'No model file found for epoch X'")
print(" β Check that .model files exist in the directory")
print(" β Verify the epoch number is correct")
print(" β List files: ls /path/to/model/dir/*.model")
print("\n2. 'Error loading VINE weights'")
print(" β Check file permissions")
print(" β Verify the model file is not corrupted")
print(" β Try loading with torch.load() directly first")
print("\n3. 'CLIP model mismatch'")
print(" β Ensure config.model_name matches the base model used in training")
print("\n4. 'Device mismatch errors'")
print(" β Models are loaded to CPU first, then moved to device")
print(" β Check CUDA availability with torch.cuda.is_available()")
print("\nDebugging steps:")
print("1. Test loading ensemble model directly:")
print(" model = torch.load('path/to/model.0.model', map_location='cpu')")
print("2. Check model attributes:")
print(" print(dir(model))")
print("3. Verify state_dict keys:")
print(" print(model.clip_cate_model.state_dict().keys())")
if __name__ == "__main__":
print("VINE Ensemble Weights Loading Examples")
print("=" * 50)
# Test ensemble weight loading
try:
model1 = example_load_ensemble_weights()
except Exception as e:
print(f"Ensemble loading example failed: {e}")
try:
model2 = example_direct_ensemble_loading()
except Exception as e:
print(f"Direct loading example failed: {e}")
# Compare approaches
try:
example_compare_original_vs_hf()
except Exception as e:
print(f"Comparison example failed: {e}")
# Test pipeline with ensemble weights
try:
pipeline = example_ensemble_with_pipeline()
except Exception as e:
print(f"Pipeline example failed: {e}")
# Educational content
demonstrate_weight_transfer()
troubleshooting_guide()
print("\n" + "=" * 50)
print("Key Points:")
print("1. AutoModel.from_pretrained() won't work with .pt ensemble weights")
print("2. Use torch.load() to load the ensemble, then transfer weights")
print("3. The HuggingFace interface preserves your fine-tuned weights")
print("4. Specify pretrained_vine_path in VineConfig to auto-load weights")
print("5. Use VineModel.from_pretrained_vine() for direct loading")
|