Spaces:
Running
on
Zero
Running
on
Zero
| """ | |
| Example usage of VINE HuggingFace interface with pretrained VINE weights | |
| This script demonstrates how to use the VINE model with your pretrained weights | |
| from the ensemble format or from video-fm/vine_v0. | |
| """ | |
| import os | |
| import sys | |
| import torch | |
| from transformers import pipeline | |
| from transformers.pipelines import PIPELINE_REGISTRY | |
| # Set your OpenAI API key here or via environment variable | |
| #os.environ['OPENAI_API_KEY'] = "dummy-key" | |
| # Add the parent directory to the path to import vine_hf | |
| sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) | |
| from vine_hf import VineConfig, VineModel, VinePipeline | |
| def example_with_local_pretrained_weights(): | |
| print("=== Using Local Pretrained VINE Weights ===") | |
| # Download https://huggingface.co/video-fm/vine_v0/tree/main/laser_model_v1.pt | |
| pretrained_vine_file = "/path/to/your/local/laser_model_v1.pt" # Replace with your local path | |
| # Create configuration with your pretrained path (local file) | |
| config = VineConfig( | |
| model_name="openai/clip-vit-base-patch32", | |
| segmentation_method="grounding_dino_sam2", | |
| target_fps=1, | |
| visualize=True, | |
| visualization_dir="path/to/visualization/dir", | |
| debug_visualizations=True, | |
| use_hf_repo=False, | |
| local_dir=os.path.dirname(pretrained_vine_file), | |
| local_filename=os.path.basename(pretrained_vine_file), | |
| ) | |
| # Method 1: Initialize model directly | |
| print("Method 1: Direct model initialization") | |
| vine_model = VineModel(config) | |
| print(f"β Model initialized with pretrained weights from: {pretrained_vine_file}") | |
| # Method 2: Use the from_pretrained_vine class method | |
| print("\nMethod 2: Using from_pretrained_vine class method") | |
| vine_model_2 = VineModel.from_pretrained_vine( | |
| model_path=pretrained_vine_file, | |
| config=config, | |
| epoch=0 # Specify epoch number | |
| ) | |
| print("β Model loaded using from_pretrained_vine method") | |
| return vine_model | |
| def example_with_huggingface_hub(): | |
| """Example using VINE weights from HuggingFace Hub.""" | |
| print("\n=== Using HuggingFace Hub Weights ===") | |
| # Create configuration to use HuggingFace Hub weights | |
| config = VineConfig( | |
| model_name="openai/clip-vit-base-patch32", | |
| use_hf_repo=True, | |
| model_repo="video-fm/vine_v0", # Your HF Hub model | |
| segmentation_method="grounding_dino_sam2", | |
| visualize=True, | |
| visualization_dir="path/to/visualization/dir", | |
| debug_visualizations=True, | |
| ) | |
| try: | |
| # Initialize model (will try to load from HF Hub) | |
| vine_model = VineModel(config) | |
| print("β Model loaded from HuggingFace Hub: video-fm/vine_v0") | |
| return vine_model | |
| except Exception as e: | |
| print(f"β Could not load from HuggingFace Hub: {e}") | |
| print("Make sure your model is pushed to video-fm/vine_v0") | |
| return None | |
| def example_pipeline_with_pretrained(): | |
| """Example using pipeline with pretrained VINE weights.""" | |
| print("\n=== Pipeline with Pretrained VINE ===") | |
| # Register the pipeline | |
| PIPELINE_REGISTRY.register_pipeline( | |
| "vine-video-understanding", | |
| pipeline_class=VinePipeline, | |
| pt_model=VineModel, | |
| type="multimodal", | |
| ) | |
| # Create configuration with your weights | |
| pretrained_vine_file = "/path/to/your/local/laser_model_v1.pt" # Replace with your local path | |
| config = VineConfig( | |
| model_name="openai/clip-vit-base-patch32", | |
| segmentation_method="grounding_dino_sam2", | |
| visualize=True, | |
| visualization_dir="path/to/visualization/dir", | |
| debug_visualizations=True, | |
| use_hf_repo=False, | |
| local_dir=os.path.dirname(pretrained_vine_file), | |
| local_filename=os.path.basename(pretrained_vine_file), | |
| ) | |
| # Create model with pretrained weights | |
| vine_model = VineModel(config) | |
| # Create pipeline with segmentation model paths | |
| vine_pipeline = VinePipeline( | |
| model=vine_model, | |
| tokenizer=None, | |
| sam_config_path="path/to/sam2/configs/sam2.1_hiera_b+.yaml", | |
| sam_checkpoint_path="path/to/sam2/checkpoints/sam2.1_hiera_base_plus.pt", | |
| gd_config_path="path/to/GroundingDINO/config/GroundingDINO_SwinT_OGC.py", | |
| gd_checkpoint_path="path/to/GroundingDINO/checkpoints/groundingdino_swint_ogc.pth", | |
| device=0 | |
| ) | |
| print("β Pipeline created with pretrained VINE weights") | |
| # Example usage (would require actual video file) | |
| demo_video = os.path.join(os.path.dirname(__file__), "../demo/videos/v1.mp4") | |
| if os.path.exists(demo_video): | |
| print(f"Found demo video: {demo_video}") | |
| print("Example pipeline call:") | |
| print(f"results = vine_pipeline(") | |
| print(f" '{demo_video}',") | |
| print(f" categorical_keywords=['human', 'dog', 'frisbee'],") | |
| print(f" unary_keywords=['running', 'jumping', 'sitting'],") | |
| print(f" binary_keywords=['behind', 'chasing', 'next to']") | |
| print(f" debug_visualizations=True") | |
| print(f")") | |
| # Uncomment to actually run (requires segmentation models) | |
| # results = vine_pipeline( | |
| # demo_video, | |
| # categorical_keywords=['human', 'dog', 'frisbee'], | |
| # unary_keywords=['running', 'jumping', 'sitting'], | |
| # binary_keywords=['behind', 'chasing', 'next to'], | |
| # debug_visualizations=True, | |
| # ) | |
| # print("Results:", results['summary']) | |
| return vine_pipeline | |
| def example_manual_weight_loading(): | |
| """Example of manually loading weights after model creation.""" | |
| print("\n=== Manual Weight Loading ===") | |
| # Create model with base CLIP weights | |
| # No pretrained path: create base config (no HF repo or local file configured) | |
| config = VineConfig() | |
| vine_model = VineModel(config) | |
| print("β Model created with base CLIP weights") | |
| model_dir = "/path/to/your/local/ensemble/model_dir.pt" # Replace with your model directory | |
| if os.path.exists(model_dir): | |
| success = vine_model.load_pretrained_vine_weights(model_dir, epoch=0) | |
| if success: | |
| print("β Successfully loaded pretrained VINE weights manually") | |
| else: | |
| print("β Failed to load pretrained weights") | |
| else: | |
| print(f"β Model directory not found: {model_dir}") | |
| return vine_model | |
| def compare_model_outputs(): | |
| """Compare outputs between base CLIP and pretrained VINE.""" | |
| print("\n=== Comparing Model Outputs ===") | |
| # Create dummy data for testing | |
| video_frames = torch.randn(3, 224, 224, 3) * 255 # 3 frames | |
| video_frames = video_frames.clamp(0, 255).byte() | |
| masks = { | |
| 0: {1: torch.ones(224, 224, 1)}, | |
| 1: {1: torch.ones(224, 224, 1)}, | |
| 2: {1: torch.ones(224, 224, 1)} | |
| } | |
| bboxes = { | |
| 0: {1: [50, 50, 150, 150]}, | |
| 1: {1: [52, 52, 152, 152]}, | |
| 2: {1: [54, 54, 154, 154]} | |
| } | |
| keywords = ['human', 'dog', 'frisbee'] | |
| # Model 1: Base CLIP | |
| print("Creating model with base CLIP weights...") | |
| config_base = VineConfig() | |
| model_base = VineModel(config_base) | |
| # Model 2: Pretrained VINE (if available) | |
| data_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), "../../data")) | |
| model_dir = os.path.join(data_dir, "LLaVA-Video-178K-v2/models/ensemble-02-10") | |
| if os.path.exists(model_dir): | |
| print("Creating model with pretrained VINE weights...") | |
| config_vine = VineConfig( | |
| use_hf_repo=False, | |
| local_dir=model_dir, | |
| local_filename=None, | |
| ) | |
| model_vine = VineModel(config_vine) | |
| print("\nComparing predictions...") | |
| # Get predictions from both models | |
| with torch.no_grad(): | |
| results_base = model_base.predict( | |
| video_frames=video_frames, | |
| masks=masks, | |
| bboxes=bboxes, | |
| categorical_keywords=keywords, | |
| return_top_k=3 | |
| ) | |
| results_vine = model_vine.predict( | |
| video_frames=video_frames, | |
| masks=masks, | |
| bboxes=bboxes, | |
| categorical_keywords=keywords, | |
| return_top_k=3 | |
| ) | |
| print("Base CLIP confidence scores:", results_base['confidence_scores']) | |
| print("Pretrained VINE confidence scores:", results_vine['confidence_scores']) | |
| print("β Successfully compared both models") | |
| else: | |
| print(f"Pretrained model not found at: {model_dir}") | |
| print("Skipping comparison") | |
| if __name__ == "__main__": | |
| print("VINE HuggingFace Interface - Pretrained Weights Examples") | |
| print("=" * 60) | |
| try: | |
| # Test local pretrained weights | |
| model1 = example_with_local_pretrained_weights() | |
| except Exception as e: | |
| print(f"Local weights example failed: {e}") | |
| try: | |
| # Test HuggingFace Hub weights | |
| model2 = example_with_huggingface_hub() | |
| except Exception as e: | |
| print(f"HuggingFace Hub example failed: {e}") | |
| try: | |
| # Test pipeline with pretrained weights | |
| pipeline = example_pipeline_with_pretrained() | |
| except Exception as e: | |
| print(f"Pipeline example failed: {e}") | |
| # try: | |
| # # Test manual weight loading | |
| # #model3 = example_manual_weight_loading() | |
| # except Exception as e: | |
| # print(f"Manual loading example failed: {e}") | |
| # try: | |
| # # Compare model outputs | |
| # #compare_model_outputs() | |
| # except Exception as e: | |
| # print(f"Comparison example failed: {e}") | |
| print("\n" + "=" * 60) | |
| print("Examples completed!") | |
| print("\nUsage Summary:") | |
| print("1. Configure VineConfig with `use_hf_repo` + `model_repo` for Hub models, or `use_hf_repo=False` + `local_dir`/`local_filename` for local weights") | |
| print("2. Use VineModel.from_pretrained_vine() for direct loading") | |