"""
Example usage of VINE HuggingFace interface with pretrained VINE weights

This script demonstrates how to use the VINE model with your pretrained weights
from the ensemble format or from video-fm/vine_v0.
"""

import os
import sys
import torch
from transformers import pipeline
from transformers.pipelines import PIPELINE_REGISTRY

# Set your OpenAI API key here or via environment variable
#os.environ['OPENAI_API_KEY'] = "dummy-key"

# Add the parent directory to the path to import vine_hf
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

from vine_hf import VineConfig, VineModel, VinePipeline


def example_with_local_pretrained_weights():
    print("=== Using Local Pretrained VINE Weights ===")
    
    
    # Download https://huggingface.co/video-fm/vine_v0/tree/main/laser_model_v1.pt
    pretrained_vine_file = "/path/to/your/local/laser_model_v1.pt"  # Replace with your local path
    
    
    # Create configuration with your pretrained path (local file)
    config = VineConfig(
        model_name="openai/clip-vit-base-patch32",
        segmentation_method="grounding_dino_sam2",
        target_fps=1,
        visualize=True,
        visualization_dir="path/to/visualization/dir",
        debug_visualizations=True,
        use_hf_repo=False,
        local_dir=os.path.dirname(pretrained_vine_file),
        local_filename=os.path.basename(pretrained_vine_file),
    )
    
    # Method 1: Initialize model directly
    print("Method 1: Direct model initialization")
    vine_model = VineModel(config)
    print(f"✓ Model initialized with pretrained weights from: {pretrained_vine_file}")
    
    # Method 2: Use the from_pretrained_vine class method
    print("\nMethod 2: Using from_pretrained_vine class method")
    vine_model_2 = VineModel.from_pretrained_vine(
        model_path=pretrained_vine_file,
        config=config,
        epoch=0  # Specify epoch number
    )
    print("✓ Model loaded using from_pretrained_vine method")
    
    return vine_model


def example_with_huggingface_hub():
    """Example using VINE weights from HuggingFace Hub."""
    print("\n=== Using HuggingFace Hub Weights ===")
    
    # Create configuration to use HuggingFace Hub weights
    config = VineConfig(
        model_name="openai/clip-vit-base-patch32",
        use_hf_repo=True,
        model_repo="video-fm/vine_v0",  # Your HF Hub model
        segmentation_method="grounding_dino_sam2",
        visualize=True,
        visualization_dir="path/to/visualization/dir",
        debug_visualizations=True,
    )
    
    try:
        # Initialize model (will try to load from HF Hub)
        vine_model = VineModel(config)
        print("✓ Model loaded from HuggingFace Hub: video-fm/vine_v0")
        return vine_model
    except Exception as e:
        print(f"✗ Could not load from HuggingFace Hub: {e}")
        print("Make sure your model is pushed to video-fm/vine_v0")
        return None


def example_pipeline_with_pretrained():
    """Example using pipeline with pretrained VINE weights."""
    print("\n=== Pipeline with Pretrained VINE ===")
    
    # Register the pipeline
    PIPELINE_REGISTRY.register_pipeline(
        "vine-video-understanding",
        pipeline_class=VinePipeline,
        pt_model=VineModel,
        type="multimodal",
    )
    
    # Create configuration with your weights
    pretrained_vine_file = "/path/to/your/local/laser_model_v1.pt"  # Replace with your local path
    config = VineConfig(
        model_name="openai/clip-vit-base-patch32",
        segmentation_method="grounding_dino_sam2",
        visualize=True,
        visualization_dir="path/to/visualization/dir",
        debug_visualizations=True,
        use_hf_repo=False,
        local_dir=os.path.dirname(pretrained_vine_file),
        local_filename=os.path.basename(pretrained_vine_file),
    )
    
    # Create model with pretrained weights
    vine_model = VineModel(config)
    
    # Create pipeline with segmentation model paths
    vine_pipeline = VinePipeline(
        model=vine_model,
        tokenizer=None,
        sam_config_path="path/to/sam2/configs/sam2.1_hiera_b+.yaml",
        sam_checkpoint_path="path/to/sam2/checkpoints/sam2.1_hiera_base_plus.pt",
        gd_config_path="path/to/GroundingDINO/config/GroundingDINO_SwinT_OGC.py",
        gd_checkpoint_path="path/to/GroundingDINO/checkpoints/groundingdino_swint_ogc.pth",
        device=0
    )
    
    print("✓ Pipeline created with pretrained VINE weights")
    
    # Example usage (would require actual video file)
    demo_video = os.path.join(os.path.dirname(__file__), "../demo/videos/v1.mp4")
    
    if os.path.exists(demo_video):
        print(f"Found demo video: {demo_video}")
        print("Example pipeline call:")
        print(f"results = vine_pipeline(")
        print(f"    '{demo_video}',")
        print(f"    categorical_keywords=['human', 'dog', 'frisbee'],")
        print(f"    unary_keywords=['running', 'jumping', 'sitting'],")
        print(f"    binary_keywords=['behind', 'chasing', 'next to']")
        print(f"    debug_visualizations=True")
        print(f")")
        
        # Uncomment to actually run (requires segmentation models)
        # results = vine_pipeline(
        #     demo_video,
        #     categorical_keywords=['human', 'dog', 'frisbee'],
        #     unary_keywords=['running', 'jumping', 'sitting'],
        #     binary_keywords=['behind', 'chasing', 'next to'],
        #     debug_visualizations=True,
        # )
        # print("Results:", results['summary'])
    
    return vine_pipeline


def example_manual_weight_loading():
    """Example of manually loading weights after model creation."""
    print("\n=== Manual Weight Loading ===")
    
    # Create model with base CLIP weights
    # No pretrained path: create base config (no HF repo or local file configured)
    config = VineConfig()
    vine_model = VineModel(config)
    print("✓ Model created with base CLIP weights")
    model_dir = "/path/to/your/local/ensemble/model_dir.pt"  # Replace with your model directory
    
    if os.path.exists(model_dir):
        success = vine_model.load_pretrained_vine_weights(model_dir, epoch=0)
        if success:
            print("✓ Successfully loaded pretrained VINE weights manually")
        else:
            print("✗ Failed to load pretrained weights")
    else:
        print(f"✗ Model directory not found: {model_dir}")
    
    return vine_model


def compare_model_outputs():
    """Compare outputs between base CLIP and pretrained VINE."""
    print("\n=== Comparing Model Outputs ===")
    
    # Create dummy data for testing
    video_frames = torch.randn(3, 224, 224, 3) * 255  # 3 frames
    video_frames = video_frames.clamp(0, 255).byte()
    
    masks = {
        0: {1: torch.ones(224, 224, 1)},
        1: {1: torch.ones(224, 224, 1)},
        2: {1: torch.ones(224, 224, 1)}
    }
    
    bboxes = {
        0: {1: [50, 50, 150, 150]},
        1: {1: [52, 52, 152, 152]},
        2: {1: [54, 54, 154, 154]}
    }
    
    keywords = ['human', 'dog', 'frisbee']
    
    # Model 1: Base CLIP
    print("Creating model with base CLIP weights...")
    config_base = VineConfig()
    model_base = VineModel(config_base)
    
    # Model 2: Pretrained VINE (if available)
    data_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), "../../data"))
    model_dir = os.path.join(data_dir, "LLaVA-Video-178K-v2/models/ensemble-02-10")
    
    if os.path.exists(model_dir):
        print("Creating model with pretrained VINE weights...")
        config_vine = VineConfig(
            use_hf_repo=False,
            local_dir=model_dir,
            local_filename=None,
        )
        model_vine = VineModel(config_vine)
        
        print("\nComparing predictions...")
        
        # Get predictions from both models
        with torch.no_grad():
            results_base = model_base.predict(
                video_frames=video_frames,
                masks=masks,
                bboxes=bboxes,
                categorical_keywords=keywords,
                return_top_k=3
            )
            
            results_vine = model_vine.predict(
                video_frames=video_frames,
                masks=masks,
                bboxes=bboxes,
                categorical_keywords=keywords,
                return_top_k=3
            )
        
        print("Base CLIP confidence scores:", results_base['confidence_scores'])
        print("Pretrained VINE confidence scores:", results_vine['confidence_scores'])
        
        print("✓ Successfully compared both models")
    else:
        print(f"Pretrained model not found at: {model_dir}")
        print("Skipping comparison")


if __name__ == "__main__":
    print("VINE HuggingFace Interface - Pretrained Weights Examples")
    print("=" * 60)
    
    try:
        # Test local pretrained weights
        model1 = example_with_local_pretrained_weights()
    except Exception as e:
        print(f"Local weights example failed: {e}")
    
    try:
        # Test HuggingFace Hub weights
        model2 = example_with_huggingface_hub()
    except Exception as e:
        print(f"HuggingFace Hub example failed: {e}")
    
    try:
        # Test pipeline with pretrained weights
        pipeline = example_pipeline_with_pretrained()
    except Exception as e:
        print(f"Pipeline example failed: {e}")
    
    # try:
    #     # Test manual weight loading
    #     #model3 = example_manual_weight_loading()
    # except Exception as e:
    #     print(f"Manual loading example failed: {e}")
    
    # try:
    #     # Compare model outputs
    #     #compare_model_outputs()
    # except Exception as e:
    #     print(f"Comparison example failed: {e}")
    
    print("\n" + "=" * 60)
    print("Examples completed!")
    print("\nUsage Summary:")
    print("1. Configure VineConfig with `use_hf_repo` + `model_repo` for Hub models, or `use_hf_repo=False` + `local_dir`/`local_filename` for local weights")
    print("2. Use VineModel.from_pretrained_vine() for direct loading")