import numpy as np
from huggingface_hub import snapshot_download
from PIL import Image


base_model_path = "HuggingFaceTB/SmolVLM-Instruct"
peft_model_path = "chuuhtetnaing/smolvlm-mmocr-sft-round-3"


def warmup_model():
    """
    Warm up the VLM model with a dummy inference to reduce initial loading time.
    """
    from ocr import recognition

    print("Warming up VLM model...")

    dummy_image = Image.fromarray(np.ones((64, 128, 3), dtype=np.uint8) * 255)
    dummy_detection_data = [{"box": [10, 10, 50, 30], "crop": dummy_image, "line_no": 1}]

    try:
        _ = recognition.inference(dummy_detection_data)
        print("VLM model warmed up successfully!")
    except Exception as e:
        print(f"Warning: Model warmup failed: {e}")


def download_pretrained_models():
    snapshot_download(repo_id=base_model_path, local_dir="./smol-vlm-model/base-model")
    snapshot_download(repo_id=peft_model_path, local_dir="./smol-vlm-model/peft-model")