| | |
| | """ |
| | Monitor the Mistral Nemo service startup and run tests when ready. |
| | """ |
| | import time |
| | import requests |
| | import json |
| | import sys |
| |
|
| | def check_service_health(): |
| | """Check if the service is healthy and ready.""" |
| | try: |
| | response = requests.get("http://localhost:8001/health", timeout=5) |
| | if response.status_code == 200: |
| | data = response.json() |
| | return data.get("status") == "healthy" |
| | except requests.exceptions.RequestException: |
| | pass |
| | return False |
| |
|
| | def test_chat_completion(): |
| | """Test the chat completion endpoint.""" |
| | try: |
| | response = requests.post( |
| | "http://localhost:8001/v1/chat/completions", |
| | headers={"Content-Type": "application/json"}, |
| | json={ |
| | "model": "unsloth/Mistral-Nemo-Instruct-2407", |
| | "messages": [ |
| | {"role": "user", "content": "Hello! Please say 'Service is working correctly' if you can read this."} |
| | ], |
| | "max_tokens": 50, |
| | "temperature": 0.7 |
| | }, |
| | timeout=30 |
| | ) |
| | |
| | if response.status_code == 200: |
| | data = response.json() |
| | content = data["choices"][0]["message"]["content"] |
| | print(f"β
Chat completion successful: {content}") |
| | return True |
| | else: |
| | print(f"β Chat completion failed: {response.status_code} - {response.text}") |
| | return False |
| | except requests.exceptions.RequestException as e: |
| | print(f"β Chat completion error: {e}") |
| | return False |
| |
|
| | def monitor_service(): |
| | """Monitor service startup and test when ready.""" |
| | print("π Monitoring Mistral Nemo service startup...") |
| | print("π₯ Waiting for model download and loading to complete...") |
| | |
| | check_count = 0 |
| | max_checks = 300 |
| | |
| | while check_count < max_checks: |
| | if check_service_health(): |
| | print("\nπ Service is healthy! Running tests...") |
| | |
| | |
| | if test_chat_completion(): |
| | print("\nβ
All tests passed! Mistral Nemo service is fully operational.") |
| | return True |
| | else: |
| | print("\nβ οΈ Service health check passed but chat completion failed.") |
| | return False |
| | |
| | check_count += 1 |
| | dots = "." * (check_count % 4) |
| | print(f"\rβ³ Waiting for service to be ready{dots:<3} ({check_count}/300)", end="") |
| | time.sleep(5) |
| | |
| | print(f"\nβ Service didn't become ready after {max_checks * 5} seconds") |
| | return False |
| |
|
| | if __name__ == "__main__": |
| | success = monitor_service() |
| | sys.exit(0 if success else 1) |
| |
|