Spaces:

ianshank
/

phi35-moe-demo

Sleeping

App Files Files Community

phi35-moe-demo / tests /contract /test_huggingface_api.py

ianshank

🚀 Deploy robust modular solution with comprehensive testing and CPU/GPU support

6510698 verified 3 months ago

raw

history blame contribute delete

9.94 kB

	"""
	Contract tests for Hugging Face API interactions.

	These tests verify that our code correctly interacts with external HF services.
	They can be run against real APIs or mocked for CI/CD.
	"""

	import pytest
	import os
	from unittest.mock import patch, MagicMock
	import sys
	from pathlib import Path

	# Add project root to path
	project_root = Path(__file__).parent.parent.parent
	sys.path.insert(0, str(project_root))

	from scripts.select_revision import RevisionSelector
	from huggingface_hub import HfApi
	from huggingface_hub.utils import RepositoryNotFoundError, RevisionNotFoundError


	class TestHuggingFaceAPIContract:
	"""Test contracts with Hugging Face API."""

	def setup_method(self):
	"""Setup test fixtures."""
	self.test_model_id = "microsoft/Phi-3.5-MoE-instruct"
	self.selector = RevisionSelector(self.test_model_id)

	@pytest.mark.integration
	def test_hf_api_connection(self):
	"""Test that we can connect to HF API (requires internet)."""
	api = HfApi()

	try:
	# Try to get model info - this should work for public models
	model_info = api.model_info(self.test_model_id)
	assert model_info is not None
	assert model_info.modelId == self.test_model_id
	except Exception as e:
	pytest.skip(f"Cannot connect to HF API: {e}")

	@patch('huggingface_hub.HfApi.list_repo_commits')
	def test_get_recent_commits_contract(self, mock_list_commits):
	"""Test contract for getting recent commits."""
	# Mock commit objects
	mock_commits = [
	MagicMock(commit_id="abc123"),
	MagicMock(commit_id="def456"),
	MagicMock(commit_id="ghi789")
	]
	mock_list_commits.return_value = mock_commits

	commits = self.selector.get_recent_commits(max_commits=2)

	# Verify API was called correctly
	mock_list_commits.assert_called_once_with(
	repo_id=self.test_model_id,
	repo_type="model"
	)

	# Verify we got the expected number of commits
	assert len(commits) == 2
	assert commits == ["abc123", "def456"]

	@patch('huggingface_hub.HfApi.list_repo_commits')
	def test_get_recent_commits_api_error(self, mock_list_commits):
	"""Test handling of API errors when getting commits."""
	mock_list_commits.side_effect = RepositoryNotFoundError("Model not found")

	commits = self.selector.get_recent_commits()

	# Should return empty list on error
	assert commits == []

	@patch('huggingface_hub.hf_hub_download')
	def test_is_cpu_safe_revision_contract(self, mock_download):
	"""Test contract for checking CPU-safe revisions."""
	# Mock file content without flash_attn imports
	mock_file_path = "/tmp/test_modeling.py"
	mock_download.return_value = mock_file_path

	# Create mock file content
	safe_content = """
	import torch
	import torch.nn as nn
	from transformers import PreTrainedModel

	class TestModel(PreTrainedModel):
	def __init__(self, config):
	super().__init__(config)
	# No flash_attn imports here
	"""

	with patch('builtins.open', create=True) as mock_open:
	mock_open.return_value.__enter__.return_value.read.return_value = safe_content

	result = self.selector.is_cpu_safe_revision("abc123")

	# Verify download was called correctly
	mock_download.assert_called_once_with(
	repo_id=self.test_model_id,
	filename="modeling_phimoe.py",
	revision="abc123",
	repo_type="model",
	cache_dir=".cache"
	)

	assert result is True

	@patch('huggingface_hub.hf_hub_download')
	def test_is_cpu_safe_revision_with_flash_attn(self, mock_download):
	"""Test detection of flash_attn imports."""
	mock_file_path = "/tmp/test_modeling.py"
	mock_download.return_value = mock_file_path

	# Mock file content WITH flash_attn imports
	unsafe_content = """
	import torch
	import torch.nn as nn
	import flash_attn
	from transformers import PreTrainedModel

	class TestModel(PreTrainedModel):
	def __init__(self, config):
	super().__init__(config)
	"""

	with patch('builtins.open', create=True) as mock_open:
	mock_open.return_value.__enter__.return_value.read.return_value = unsafe_content

	result = self.selector.is_cpu_safe_revision("abc123")

	assert result is False

	@patch('huggingface_hub.hf_hub_download')
	def test_is_cpu_safe_revision_download_error(self, mock_download):
	"""Test handling of download errors."""
	mock_download.side_effect = RevisionNotFoundError("Revision not found")

	result = self.selector.is_cpu_safe_revision("nonexistent")

	# Should return False on download error
	assert result is False

	def test_save_revision_to_env_contract(self):
	"""Test contract for saving revision to .env file."""
	test_revision = "abc123def456"

	# Use a temporary file for testing
	import tempfile
	with tempfile.NamedTemporaryFile(mode='w+', suffix='.env', delete=False) as tmp_file:
	tmp_path = Path(tmp_file.name)

	try:
	# Patch the ENV_FILE path
	with patch('scripts.select_revision.ENV_FILE', tmp_path):
	self.selector.save_revision_to_env(test_revision)

	# Verify file was written correctly
	content = tmp_path.read_text()
	assert f"HF_REVISION={test_revision}" in content

	finally:
	# Clean up
	if tmp_path.exists():
	tmp_path.unlink()

	def test_save_revision_to_env_existing_file(self):
	"""Test saving revision when .env file already exists."""
	test_revision = "new123revision"
	existing_content = """
	# Existing env file
	SOME_VAR=value
	HF_REVISION=old123revision
	OTHER_VAR=other_value
	"""

	import tempfile
	with tempfile.NamedTemporaryFile(mode='w+', suffix='.env', delete=False) as tmp_file:
	tmp_file.write(existing_content)
	tmp_file.flush()
	tmp_path = Path(tmp_file.name)

	try:
	with patch('scripts.select_revision.ENV_FILE', tmp_path):
	self.selector.save_revision_to_env(test_revision)

	content = tmp_path.read_text()

	# Should have new revision
	assert f"HF_REVISION={test_revision}" in content
	# Should not have old revision
	assert "HF_REVISION=old123revision" not in content
	# Should preserve other variables
	assert "SOME_VAR=value" in content
	assert "OTHER_VAR=other_value" in content

	finally:
	if tmp_path.exists():
	tmp_path.unlink()


	class TestTransformersContract:
	"""Test contracts with transformers library."""

	@patch('transformers.AutoTokenizer.from_pretrained')
	def test_tokenizer_loading_contract(self, mock_tokenizer):
	"""Test contract for tokenizer loading."""
	mock_tokenizer_instance = MagicMock()
	mock_tokenizer.return_value = mock_tokenizer_instance

	from app.model_loader import ModelLoader
	loader = ModelLoader()

	# Create a minimal config
	from app.config.model_config import ModelConfig
	import torch

	loader.config = ModelConfig(
	model_id="test/model",
	revision="main",
	dtype=torch.float32,
	device_map="cpu",
	attn_implementation="eager",
	low_cpu_mem_usage=True,
	trust_remote_code=True
	)

	result = loader.load_tokenizer()

	# Verify tokenizer was called with correct parameters
	mock_tokenizer.assert_called_once_with(
	"test/model",
	trust_remote_code=True,
	revision="main"
	)

	assert result is True
	assert loader.tokenizer == mock_tokenizer_instance

	@patch('transformers.AutoModelForCausalLM.from_pretrained')
	def test_model_loading_contract(self, mock_model):
	"""Test contract for model loading."""
	mock_model_instance = MagicMock()
	mock_model_instance.eval.return_value = mock_model_instance
	mock_model.return_value = mock_model_instance

	from app.model_loader import ModelLoader
	loader = ModelLoader()

	# Create a minimal config
	from app.config.model_config import ModelConfig
	import torch

	loader.config = ModelConfig(
	model_id="test/model",
	revision="main",
	dtype=torch.float32,
	device_map="cpu",
	attn_implementation="eager",
	low_cpu_mem_usage=True,
	trust_remote_code=True
	)

	result = loader.load_model()

	# Verify model was called with correct parameters
	mock_model.assert_called_once_with(
	"test/model",
	trust_remote_code=True,
	revision="main",
	attn_implementation="eager",
	dtype=torch.float32, # Should use dtype, not torch_dtype
	device_map="cpu",
	low_cpu_mem_usage=True
	)

	# Verify eval() was called
	mock_model_instance.eval.assert_called_once()

	assert result is True
	assert loader.model == mock_model_instance


	if __name__ == "__main__":
	pytest.main([__file__])