Spaces:

jiani-huang
/

LASER

Running on Zero

LASER / src /vine_hf /example_with_pretrained_vine.py

moqingyan123

final fixes

888f9e4 11 days ago

10.1 kB

	"""
	Example usage of VINE HuggingFace interface with pretrained VINE weights

	This script demonstrates how to use the VINE model with your pretrained weights
	from the ensemble format or from video-fm/vine_v0.
	"""

	import os
	import sys
	import torch
	from transformers import pipeline
	from transformers.pipelines import PIPELINE_REGISTRY

	# Set your OpenAI API key here or via environment variable
	#os.environ['OPENAI_API_KEY'] = "dummy-key"

	# Add the parent directory to the path to import vine_hf
	sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

	from vine_hf import VineConfig, VineModel, VinePipeline


	def example_with_local_pretrained_weights():
	print("=== Using Local Pretrained VINE Weights ===")


	# Download https://huggingface.co/video-fm/vine_v0/tree/main/laser_model_v1.pt
	pretrained_vine_file = "/path/to/your/local/laser_model_v1.pt" # Replace with your local path


	# Create configuration with your pretrained path (local file)
	config = VineConfig(
	model_name="openai/clip-vit-base-patch32",
	segmentation_method="grounding_dino_sam2",
	target_fps=1,
	visualize=True,
	visualization_dir="path/to/visualization/dir",
	debug_visualizations=True,
	use_hf_repo=False,
	local_dir=os.path.dirname(pretrained_vine_file),
	local_filename=os.path.basename(pretrained_vine_file),
	)

	# Method 1: Initialize model directly
	print("Method 1: Direct model initialization")
	vine_model = VineModel(config)
	print(f"✓ Model initialized with pretrained weights from: {pretrained_vine_file}")

	# Method 2: Use the from_pretrained_vine class method
	print("\nMethod 2: Using from_pretrained_vine class method")
	vine_model_2 = VineModel.from_pretrained_vine(
	model_path=pretrained_vine_file,
	config=config,
	epoch=0 # Specify epoch number
	)
	print("✓ Model loaded using from_pretrained_vine method")

	return vine_model


	def example_with_huggingface_hub():
	"""Example using VINE weights from HuggingFace Hub."""
	print("\n=== Using HuggingFace Hub Weights ===")

	# Create configuration to use HuggingFace Hub weights
	config = VineConfig(
	model_name="openai/clip-vit-base-patch32",
	use_hf_repo=True,
	model_repo="video-fm/vine_v0", # Your HF Hub model
	segmentation_method="grounding_dino_sam2",
	visualize=True,
	visualization_dir="path/to/visualization/dir",
	debug_visualizations=True,
	)

	try:
	# Initialize model (will try to load from HF Hub)
	vine_model = VineModel(config)
	print("✓ Model loaded from HuggingFace Hub: video-fm/vine_v0")
	return vine_model
	except Exception as e:
	print(f"✗ Could not load from HuggingFace Hub: {e}")
	print("Make sure your model is pushed to video-fm/vine_v0")
	return None


	def example_pipeline_with_pretrained():
	"""Example using pipeline with pretrained VINE weights."""
	print("\n=== Pipeline with Pretrained VINE ===")

	# Register the pipeline
	PIPELINE_REGISTRY.register_pipeline(
	"vine-video-understanding",
	pipeline_class=VinePipeline,
	pt_model=VineModel,
	type="multimodal",
	)

	# Create configuration with your weights
	pretrained_vine_file = "/path/to/your/local/laser_model_v1.pt" # Replace with your local path
	config = VineConfig(
	model_name="openai/clip-vit-base-patch32",
	segmentation_method="grounding_dino_sam2",
	visualize=True,
	visualization_dir="path/to/visualization/dir",
	debug_visualizations=True,
	use_hf_repo=False,
	local_dir=os.path.dirname(pretrained_vine_file),
	local_filename=os.path.basename(pretrained_vine_file),
	)

	# Create model with pretrained weights
	vine_model = VineModel(config)

	# Create pipeline with segmentation model paths
	vine_pipeline = VinePipeline(
	model=vine_model,
	tokenizer=None,
	sam_config_path="path/to/sam2/configs/sam2.1_hiera_b+.yaml",
	sam_checkpoint_path="path/to/sam2/checkpoints/sam2.1_hiera_base_plus.pt",
	gd_config_path="path/to/GroundingDINO/config/GroundingDINO_SwinT_OGC.py",
	gd_checkpoint_path="path/to/GroundingDINO/checkpoints/groundingdino_swint_ogc.pth",
	device=0
	)

	print("✓ Pipeline created with pretrained VINE weights")

	# Example usage (would require actual video file)
	demo_video = os.path.join(os.path.dirname(__file__), "../demo/videos/v1.mp4")

	if os.path.exists(demo_video):
	print(f"Found demo video: {demo_video}")
	print("Example pipeline call:")
	print(f"results = vine_pipeline(")
	print(f" '{demo_video}',")
	print(f" categorical_keywords=['human', 'dog', 'frisbee'],")
	print(f" unary_keywords=['running', 'jumping', 'sitting'],")
	print(f" binary_keywords=['behind', 'chasing', 'next to']")
	print(f" debug_visualizations=True")
	print(f")")

	# Uncomment to actually run (requires segmentation models)
	# results = vine_pipeline(
	# demo_video,
	# categorical_keywords=['human', 'dog', 'frisbee'],
	# unary_keywords=['running', 'jumping', 'sitting'],
	# binary_keywords=['behind', 'chasing', 'next to'],
	# debug_visualizations=True,
	# )
	# print("Results:", results['summary'])

	return vine_pipeline



	def example_manual_weight_loading():
	"""Example of manually loading weights after model creation."""
	print("\n=== Manual Weight Loading ===")

	# Create model with base CLIP weights
	# No pretrained path: create base config (no HF repo or local file configured)
	config = VineConfig()
	vine_model = VineModel(config)
	print("✓ Model created with base CLIP weights")
	model_dir = "/path/to/your/local/ensemble/model_dir.pt" # Replace with your model directory

	if os.path.exists(model_dir):
	success = vine_model.load_pretrained_vine_weights(model_dir, epoch=0)
	if success:
	print("✓ Successfully loaded pretrained VINE weights manually")
	else:
	print("✗ Failed to load pretrained weights")
	else:
	print(f"✗ Model directory not found: {model_dir}")

	return vine_model


	def compare_model_outputs():
	"""Compare outputs between base CLIP and pretrained VINE."""
	print("\n=== Comparing Model Outputs ===")

	# Create dummy data for testing
	video_frames = torch.randn(3, 224, 224, 3) * 255 # 3 frames
	video_frames = video_frames.clamp(0, 255).byte()

	masks = {
	0: {1: torch.ones(224, 224, 1)},
	1: {1: torch.ones(224, 224, 1)},
	2: {1: torch.ones(224, 224, 1)}
	}

	bboxes = {
	0: {1: [50, 50, 150, 150]},
	1: {1: [52, 52, 152, 152]},
	2: {1: [54, 54, 154, 154]}
	}

	keywords = ['human', 'dog', 'frisbee']

	# Model 1: Base CLIP
	print("Creating model with base CLIP weights...")
	config_base = VineConfig()
	model_base = VineModel(config_base)

	# Model 2: Pretrained VINE (if available)
	data_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), "../../data"))
	model_dir = os.path.join(data_dir, "LLaVA-Video-178K-v2/models/ensemble-02-10")

	if os.path.exists(model_dir):
	print("Creating model with pretrained VINE weights...")
	config_vine = VineConfig(
	use_hf_repo=False,
	local_dir=model_dir,
	local_filename=None,
	)
	model_vine = VineModel(config_vine)

	print("\nComparing predictions...")

	# Get predictions from both models
	with torch.no_grad():
	results_base = model_base.predict(
	video_frames=video_frames,
	masks=masks,
	bboxes=bboxes,
	categorical_keywords=keywords,
	return_top_k=3
	)

	results_vine = model_vine.predict(
	video_frames=video_frames,
	masks=masks,
	bboxes=bboxes,
	categorical_keywords=keywords,
	return_top_k=3
	)

	print("Base CLIP confidence scores:", results_base['confidence_scores'])
	print("Pretrained VINE confidence scores:", results_vine['confidence_scores'])

	print("✓ Successfully compared both models")
	else:
	print(f"Pretrained model not found at: {model_dir}")
	print("Skipping comparison")


	if __name__ == "__main__":
	print("VINE HuggingFace Interface - Pretrained Weights Examples")
	print("=" * 60)

	try:
	# Test local pretrained weights
	model1 = example_with_local_pretrained_weights()
	except Exception as e:
	print(f"Local weights example failed: {e}")

	try:
	# Test HuggingFace Hub weights
	model2 = example_with_huggingface_hub()
	except Exception as e:
	print(f"HuggingFace Hub example failed: {e}")

	try:
	# Test pipeline with pretrained weights
	pipeline = example_pipeline_with_pretrained()
	except Exception as e:
	print(f"Pipeline example failed: {e}")

	# try:
	# # Test manual weight loading
	# #model3 = example_manual_weight_loading()
	# except Exception as e:
	# print(f"Manual loading example failed: {e}")

	# try:
	# # Compare model outputs
	# #compare_model_outputs()
	# except Exception as e:
	# print(f"Comparison example failed: {e}")

	print("\n" + "=" * 60)
	print("Examples completed!")
	print("\nUsage Summary:")
	print("1. Configure VineConfig with `use_hf_repo` + `model_repo` for Hub models, or `use_hf_repo=False` + `local_dir`/`local_filename` for local weights")
	print("2. Use VineModel.from_pretrained_vine() for direct loading")