Spaces:

jiani-huang
/

LASER

Running on Zero

LASER / vine_hf /example_ensemble_weights.py

moqingyan123

updates

f71f431 14 days ago

13.3 kB

	"""
	Example demonstrating how to load and use VINE ensemble weights

	This script shows the correct way to load your pretrained VINE ensemble weights
	and use them with the HuggingFace interface, based on the actual inference.py workflow.
	"""

	import os
	import sys
	from pathlib import Path
	import torch
	import numpy as np
	from transformers.pipelines import PIPELINE_REGISTRY

	#os.environ["OPENAI_API_KEY"]="dummy-key" # Set your OpenAI API key here or via environment variable

	# Add src/ to sys.path so LASER, video-sam2, GroundingDINO are importable
	current_dir = Path(__file__).resolve().parent
	src_dir = current_dir.parent / "src"
	if src_dir.is_dir() and str(src_dir) not in sys.path:
	sys.path.insert(0, str(src_dir))

	from vine_hf import VineConfig, VineModel, VinePipeline
	from laser.loading import load_video


	def example_load_ensemble_weights():
	"""Example of loading ensemble weights correctly."""
	print("=== Loading Ensemble VINE Weights ===")

	# Path to your ensemble model (adjust this to your actual path)
	data_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), "../../data"))
	model_dir = os.path.join(data_dir, "LLaVA-Video-178K-v2/models/ensemble-02-10")

	print(f"Looking for ensemble weights in: {model_dir}")

	if os.path.exists(model_dir):
	print("✓ Model directory found")

	# List available model files
	model_files = [f for f in os.listdir(model_dir) if f.endswith('.model')]
	print(f"Available model files: {model_files}")

	if model_files:
	# Create configuration with ensemble path (local directory with .model files)
	config = VineConfig(
	segmentation_method="grounding_dino_sam2",
	use_hf_repo=False,
	local_dir=model_dir,
	local_filename=None,
	)

	print("Creating VINE model with ensemble weights...")
	vine_model = VineModel(config)

	print("✓ VINE model created with ensemble weights!")
	return vine_model
	else:
	print("✗ No .model files found in directory")
	return None
	else:
	print(f"✗ Model directory not found: {model_dir}")
	print("Please adjust the path to point to your ensemble weights")
	return None


	def example_direct_ensemble_loading():
	"""Example of loading ensemble weights using from_pretrained_vine."""
	print("\n=== Direct Ensemble Loading ===")

	# Path to specific ensemble file
	data_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), "../../data"))
	model_dir = os.path.join(data_dir, "LLaVA-Video-178K-v2/models/ensemble-02-10")

	if os.path.exists(model_dir):
	try:
	# Use the class method for direct loading
	vine_model = VineModel.from_pretrained_vine(
	model_path=model_dir,
	epoch=0 # Load epoch 0
	)

	print("✓ Model loaded using from_pretrained_vine!")
	return vine_model

	except Exception as e:
	print(f"✗ Error loading with from_pretrained_vine: {e}")
	return None
	else:
	print(f"✗ Model directory not found: {model_dir}")
	return None


	def example_compare_original_vs_hf():
	"""Compare the original inference.py approach with HuggingFace interface."""
	print("\n=== Comparing Original vs HuggingFace Interface ===")

	data_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), "../../data"))
	model_dir = os.path.join(data_dir, "LLaVA-Video-178K-v2/models/ensemble-02-10")
	model_name = "ensemble-2025-02-10-14-57-22"
	epoch = 0

	if not os.path.exists(model_dir):
	print(f"Model directory not found: {model_dir}")
	return

	print("Original approach (from inference.py):")
	print("```python")
	print("def load_model(model_dir, model_name, epoch, device):")
	print(" model_name = model_name + f'.{epoch}.model'")
	print(" predicate_model = torch.load(os.path.join(model_dir, model_name), map_location=device, weights_only=False)")
	print(" return predicate_model")
	print("")
	print("predicate_model = load_model(model_dir, model_name, epoch, device)")
	print("```")

	print("\nNew HuggingFace approach:")
	print("```python")
	print("config = VineConfig(pretrained_vine_path=model_dir)")
	print("vine_model = VineModel(config)")
	print("# or")
	print("vine_model = VineModel.from_pretrained_vine(model_dir, epoch=0)")
	print("```")

	# Try to load with both approaches if possible
	try:
	# Original approach
	def load_model(model_dir, model_name, epoch, device):
	model_name = model_name + f'.{epoch}.model'
	model_path = os.path.join(model_dir, model_name)
	if os.path.exists(model_path):
	return torch.load(model_path, map_location=device, weights_only=False)
	else:
	print(f"Model file not found: {model_path}")
	return None

	device = "cuda" if torch.cuda.is_available() else "cpu"
	original_model = load_model(model_dir, model_name, epoch, device)

	if original_model:
	print(f"✓ Original model loaded: {type(original_model)}")
	print(f" Has clip_cate_model: {hasattr(original_model, 'clip_cate_model')}")
	print(f" Has clip_unary_model: {hasattr(original_model, 'clip_unary_model')}")
	print(f" Has clip_binary_model: {hasattr(original_model, 'clip_binary_model')}")

	# HuggingFace approach
	vine_model = VineModel.from_pretrained_vine(model_dir, epoch=epoch)

	if vine_model:
	print(f"✓ HuggingFace model loaded: {type(vine_model)}")
	print(f" Has clip_cate_model: {hasattr(vine_model, 'clip_cate_model')}")
	print(f" Has clip_unary_model: {hasattr(vine_model, 'clip_unary_model')}")
	print(f" Has clip_binary_model: {hasattr(vine_model, 'clip_binary_model')}")

	print("\n✓ Both approaches work! HuggingFace interface successfully loads ensemble weights.")

	except Exception as e:
	print(f"Error in comparison: {e}")


	def example_ensemble_with_pipeline():
	"""Example using ensemble weights with the pipeline."""
	print("\n=== Using Ensemble Weights with Pipeline ===")

	data_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), "../../data"))
	model_dir = os.path.join(data_dir, "LLaVA-Video-178K-v2/models/ensemble-02-10")

	if not os.path.exists(model_dir):
	print(f"Model directory not found: {model_dir}")
	return

	# Register pipeline
	PIPELINE_REGISTRY.register_pipeline(
	"vine-video-understanding",
	pipeline_class=VinePipeline,
	pt_model=VineModel,
	type="multimodal",
	)

	# Create model with ensemble weights (local directory)
	config = VineConfig(
	segmentation_method="grounding_dino_sam2",
	use_hf_repo=False,
	local_dir=model_dir,
	local_filename=None,
	)

	vine_model = VineModel(config)
	# Create pipeline with segmentation model paths
	vine_pipeline = VinePipeline(
	model=vine_model,
	tokenizer=None,
	# SAM2 configuration
	sam_config_path="path/to/sam2/configs/sam2.1_hiera_b+.yaml",
	sam_checkpoint_path="path/to/sam2/checkpoints/sam2.1_hiera_base_plus.pt",
	# GroundingDINO configuration
	gd_config_path="path/to/GroundingDINO/config/GroundingDINO_SwinT_OGC.py",
	gd_checkpoint_path="path/to/GroundingDINO/checkpoints/groundingdino_swint_ogc.pth",
	device="cuda" if torch.cuda.is_available() else "cpu",
	)

	print("✓ Pipeline created with ensemble VINE weights")

	# Check for demo video
	demo_video = os.path.join(os.path.dirname(__file__), "../demo/videos/v1.mp4")

	if os.path.exists(demo_video):
	print(f"Found demo video: {demo_video}")

	# Use the same keywords as in the original inference.py
	categorical_keywords = ['human', 'dog', 'frisbee']
	unary_keywords = ['running', 'jumping', 'catching', 'throwing']
	binary_keywords = ['behind', 'bite', 'front', 'jump over', 'right', 'left']

	print("Example pipeline usage:")
	print("```python")
	print("results = vine_pipeline(")
	print(f" '{demo_video}',")
	print(f" categorical_keywords={categorical_keywords},")
	print(f" unary_keywords={unary_keywords},")
	print(f" binary_keywords={binary_keywords},")
	print(" segmentation_method='grounding_dino_sam2'")
	print(")")
	print("```")

	# Uncomment to actually run (requires segmentation models)
	# try:
	# results = vine_pipeline(
	# demo_video,
	# categorical_keywords=categorical_keywords,
	# unary_keywords=unary_keywords,
	# binary_keywords=binary_keywords,
	# segmentation_method='grounding_dino_sam2'
	# )
	# print("Results:", results['summary'])
	# except Exception as e:
	# print(f"Pipeline execution failed: {e}")
	# print("This is expected if segmentation models are not set up")

	return vine_pipeline



	def demonstrate_weight_transfer():
	"""Demonstrate how weights are transferred from ensemble to HuggingFace format."""
	print("\n=== Weight Transfer Demonstration ===")

	print("The ensemble model structure (PredicateModel):")
	print("- clip_cate_model: CLIP model for categorical classification")
	print("- clip_unary_model: CLIP model for unary predicates")
	print("- clip_binary_model: CLIP model for binary relations")
	print("- clip_tokenizer: Tokenizer for text processing")
	print("- clip_processor: Processor for image processing")

	print("\nWeight transfer process:")
	print("1. Load ensemble model with torch.load()")
	print("2. Initialize base CLIP models in HuggingFace format")
	print("3. Transfer state_dict from ensemble to HuggingFace models:")
	print(" - ensemble.clip_cate_model → hf.clip_cate_model")
	print(" - ensemble.clip_unary_model → hf.clip_unary_model")
	print(" - ensemble.clip_binary_model → hf.clip_binary_model")
	print("4. Transfer tokenizer and processor")

	print("\nThis preserves all your fine-tuned weights while making them HuggingFace compatible!")


	def troubleshooting_guide():
	"""Provide troubleshooting guide for common issues."""
	print("\n=== Troubleshooting Guide ===")

	print("Common Issues:")
	print("1. 'No model file found for epoch X'")
	print(" → Check that .model files exist in the directory")
	print(" → Verify the epoch number is correct")
	print(" → List files: ls /path/to/model/dir/*.model")

	print("\n2. 'Error loading VINE weights'")
	print(" → Check file permissions")
	print(" → Verify the model file is not corrupted")
	print(" → Try loading with torch.load() directly first")

	print("\n3. 'CLIP model mismatch'")
	print(" → Ensure config.model_name matches the base model used in training")

	print("\n4. 'Device mismatch errors'")
	print(" → Models are loaded to CPU first, then moved to device")
	print(" → Check CUDA availability with torch.cuda.is_available()")

	print("\nDebugging steps:")
	print("1. Test loading ensemble model directly:")
	print(" model = torch.load('path/to/model.0.model', map_location='cpu')")
	print("2. Check model attributes:")
	print(" print(dir(model))")
	print("3. Verify state_dict keys:")
	print(" print(model.clip_cate_model.state_dict().keys())")


	if __name__ == "__main__":
	print("VINE Ensemble Weights Loading Examples")
	print("=" * 50)

	# Test ensemble weight loading
	try:
	model1 = example_load_ensemble_weights()
	except Exception as e:
	print(f"Ensemble loading example failed: {e}")

	try:
	model2 = example_direct_ensemble_loading()
	except Exception as e:
	print(f"Direct loading example failed: {e}")

	# Compare approaches
	try:
	example_compare_original_vs_hf()
	except Exception as e:
	print(f"Comparison example failed: {e}")

	# Test pipeline with ensemble weights
	try:
	pipeline = example_ensemble_with_pipeline()
	except Exception as e:
	print(f"Pipeline example failed: {e}")

	# Educational content
	demonstrate_weight_transfer()
	troubleshooting_guide()

	print("\n" + "=" * 50)
	print("Key Points:")
	print("1. AutoModel.from_pretrained() won't work with .pt ensemble weights")
	print("2. Use torch.load() to load the ensemble, then transfer weights")
	print("3. The HuggingFace interface preserves your fine-tuned weights")
	print("4. Specify pretrained_vine_path in VineConfig to auto-load weights")
	print("5. Use VineModel.from_pretrained_vine() for direct loading")