""" Example usage of VINE HuggingFace interface with pretrained VINE weights This script demonstrates how to use the VINE model with your pretrained weights from the ensemble format or from video-fm/vine_v0. """ import os import sys import torch from transformers import pipeline from transformers.pipelines import PIPELINE_REGISTRY # Set your OpenAI API key here or via environment variable #os.environ['OPENAI_API_KEY'] = "dummy-key" # Add the parent directory to the path to import vine_hf sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from vine_hf import VineConfig, VineModel, VinePipeline def example_with_local_pretrained_weights(): print("=== Using Local Pretrained VINE Weights ===") # Download https://huggingface.co/video-fm/vine_v0/tree/main/laser_model_v1.pt pretrained_vine_file = "/path/to/your/local/laser_model_v1.pt" # Replace with your local path # Create configuration with your pretrained path (local file) config = VineConfig( model_name="openai/clip-vit-base-patch32", segmentation_method="grounding_dino_sam2", target_fps=1, visualize=True, visualization_dir="path/to/visualization/dir", debug_visualizations=True, use_hf_repo=False, local_dir=os.path.dirname(pretrained_vine_file), local_filename=os.path.basename(pretrained_vine_file), ) # Method 1: Initialize model directly print("Method 1: Direct model initialization") vine_model = VineModel(config) print(f"✓ Model initialized with pretrained weights from: {pretrained_vine_file}") # Method 2: Use the from_pretrained_vine class method print("\nMethod 2: Using from_pretrained_vine class method") vine_model_2 = VineModel.from_pretrained_vine( model_path=pretrained_vine_file, config=config, epoch=0 # Specify epoch number ) print("✓ Model loaded using from_pretrained_vine method") return vine_model def example_with_huggingface_hub(): """Example using VINE weights from HuggingFace Hub.""" print("\n=== Using HuggingFace Hub Weights ===") # Create configuration to use HuggingFace Hub weights config = VineConfig( model_name="openai/clip-vit-base-patch32", use_hf_repo=True, model_repo="video-fm/vine_v0", # Your HF Hub model segmentation_method="grounding_dino_sam2", visualize=True, visualization_dir="path/to/visualization/dir", debug_visualizations=True, ) try: # Initialize model (will try to load from HF Hub) vine_model = VineModel(config) print("✓ Model loaded from HuggingFace Hub: video-fm/vine_v0") return vine_model except Exception as e: print(f"✗ Could not load from HuggingFace Hub: {e}") print("Make sure your model is pushed to video-fm/vine_v0") return None def example_pipeline_with_pretrained(): """Example using pipeline with pretrained VINE weights.""" print("\n=== Pipeline with Pretrained VINE ===") # Register the pipeline PIPELINE_REGISTRY.register_pipeline( "vine-video-understanding", pipeline_class=VinePipeline, pt_model=VineModel, type="multimodal", ) # Create configuration with your weights pretrained_vine_file = "/path/to/your/local/laser_model_v1.pt" # Replace with your local path config = VineConfig( model_name="openai/clip-vit-base-patch32", segmentation_method="grounding_dino_sam2", visualize=True, visualization_dir="path/to/visualization/dir", debug_visualizations=True, use_hf_repo=False, local_dir=os.path.dirname(pretrained_vine_file), local_filename=os.path.basename(pretrained_vine_file), ) # Create model with pretrained weights vine_model = VineModel(config) # Create pipeline with segmentation model paths vine_pipeline = VinePipeline( model=vine_model, tokenizer=None, sam_config_path="path/to/sam2/configs/sam2.1_hiera_b+.yaml", sam_checkpoint_path="path/to/sam2/checkpoints/sam2.1_hiera_base_plus.pt", gd_config_path="path/to/GroundingDINO/config/GroundingDINO_SwinT_OGC.py", gd_checkpoint_path="path/to/GroundingDINO/checkpoints/groundingdino_swint_ogc.pth", device=0 ) print("✓ Pipeline created with pretrained VINE weights") # Example usage (would require actual video file) demo_video = os.path.join(os.path.dirname(__file__), "../demo/videos/v1.mp4") if os.path.exists(demo_video): print(f"Found demo video: {demo_video}") print("Example pipeline call:") print(f"results = vine_pipeline(") print(f" '{demo_video}',") print(f" categorical_keywords=['human', 'dog', 'frisbee'],") print(f" unary_keywords=['running', 'jumping', 'sitting'],") print(f" binary_keywords=['behind', 'chasing', 'next to']") print(f" debug_visualizations=True") print(f")") # Uncomment to actually run (requires segmentation models) # results = vine_pipeline( # demo_video, # categorical_keywords=['human', 'dog', 'frisbee'], # unary_keywords=['running', 'jumping', 'sitting'], # binary_keywords=['behind', 'chasing', 'next to'], # debug_visualizations=True, # ) # print("Results:", results['summary']) return vine_pipeline def example_manual_weight_loading(): """Example of manually loading weights after model creation.""" print("\n=== Manual Weight Loading ===") # Create model with base CLIP weights # No pretrained path: create base config (no HF repo or local file configured) config = VineConfig() vine_model = VineModel(config) print("✓ Model created with base CLIP weights") model_dir = "/path/to/your/local/ensemble/model_dir.pt" # Replace with your model directory if os.path.exists(model_dir): success = vine_model.load_pretrained_vine_weights(model_dir, epoch=0) if success: print("✓ Successfully loaded pretrained VINE weights manually") else: print("✗ Failed to load pretrained weights") else: print(f"✗ Model directory not found: {model_dir}") return vine_model def compare_model_outputs(): """Compare outputs between base CLIP and pretrained VINE.""" print("\n=== Comparing Model Outputs ===") # Create dummy data for testing video_frames = torch.randn(3, 224, 224, 3) * 255 # 3 frames video_frames = video_frames.clamp(0, 255).byte() masks = { 0: {1: torch.ones(224, 224, 1)}, 1: {1: torch.ones(224, 224, 1)}, 2: {1: torch.ones(224, 224, 1)} } bboxes = { 0: {1: [50, 50, 150, 150]}, 1: {1: [52, 52, 152, 152]}, 2: {1: [54, 54, 154, 154]} } keywords = ['human', 'dog', 'frisbee'] # Model 1: Base CLIP print("Creating model with base CLIP weights...") config_base = VineConfig() model_base = VineModel(config_base) # Model 2: Pretrained VINE (if available) data_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), "../../data")) model_dir = os.path.join(data_dir, "LLaVA-Video-178K-v2/models/ensemble-02-10") if os.path.exists(model_dir): print("Creating model with pretrained VINE weights...") config_vine = VineConfig( use_hf_repo=False, local_dir=model_dir, local_filename=None, ) model_vine = VineModel(config_vine) print("\nComparing predictions...") # Get predictions from both models with torch.no_grad(): results_base = model_base.predict( video_frames=video_frames, masks=masks, bboxes=bboxes, categorical_keywords=keywords, return_top_k=3 ) results_vine = model_vine.predict( video_frames=video_frames, masks=masks, bboxes=bboxes, categorical_keywords=keywords, return_top_k=3 ) print("Base CLIP confidence scores:", results_base['confidence_scores']) print("Pretrained VINE confidence scores:", results_vine['confidence_scores']) print("✓ Successfully compared both models") else: print(f"Pretrained model not found at: {model_dir}") print("Skipping comparison") if __name__ == "__main__": print("VINE HuggingFace Interface - Pretrained Weights Examples") print("=" * 60) try: # Test local pretrained weights model1 = example_with_local_pretrained_weights() except Exception as e: print(f"Local weights example failed: {e}") try: # Test HuggingFace Hub weights model2 = example_with_huggingface_hub() except Exception as e: print(f"HuggingFace Hub example failed: {e}") try: # Test pipeline with pretrained weights pipeline = example_pipeline_with_pretrained() except Exception as e: print(f"Pipeline example failed: {e}") # try: # # Test manual weight loading # #model3 = example_manual_weight_loading() # except Exception as e: # print(f"Manual loading example failed: {e}") # try: # # Compare model outputs # #compare_model_outputs() # except Exception as e: # print(f"Comparison example failed: {e}") print("\n" + "=" * 60) print("Examples completed!") print("\nUsage Summary:") print("1. Configure VineConfig with `use_hf_repo` + `model_repo` for Hub models, or `use_hf_repo=False` + `local_dir`/`local_filename` for local weights") print("2. Use VineModel.from_pretrained_vine() for direct loading")