File size: 1,620 Bytes
9fca407
 
 
 
 
 
 
 
 
1ab6f41
6401a84
9fca407
 
 
 
 
 
 
 
 
 
 
 
 
6401a84
 
 
9fca407
 
 
 
 
 
 
 
6401a84
 
1ab6f41
9fca407
 
 
6401a84
9fca407
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
import os
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel

# This script is run during the Docker build process to pre-download models.

GENDER_MODEL_PATH = "google/gemma-3-270m-qat-q4_0-unquantized"
BASE_MODEL_PATH = "unsloth/gemma-2b-it"
# This correctly points to your fine-tuned model on the Hugging Face Hub.
LORA_ADAPTER_PATH = "enoch10jason/gemma-grammar-lora"

hf_token = os.getenv("HUGGING_FACE_HUB_TOKEN")
if not hf_token:
    raise ValueError("HUGGING_FACE_HUB_TOKEN environment variable is required to download models.")

print("--- Starting Model Pre-downloading ---")

# 1. Download Gender Model
print(f"Downloading: {GENDER_MODEL_PATH}")
AutoTokenizer.from_pretrained(GENDER_MODEL_PATH, token=hf_token)
AutoModelForCausalLM.from_pretrained(GENDER_MODEL_PATH, token=hf_token)
print("βœ… Gender model downloaded.")

# 2. Download Grammar Base Model
print(f"Downloading base model: {BASE_MODEL_PATH}")
# We need to load the base model into memory to attach the adapter to it for caching.
base_model = AutoModelForCausalLM.from_pretrained(
    BASE_MODEL_PATH,
    token=hf_token,
    dtype=torch.float32,
)
AutoTokenizer.from_pretrained(BASE_MODEL_PATH, token=hf_token)
print("βœ… Base model downloaded.")

# 3. Download Your Fine-Tuned LoRA Adapter
print(f"Downloading LoRA adapter: {LORA_ADAPTER_PATH}")
# This step downloads your private adapter and links it to the base model, caching it.
PeftModel.from_pretrained(base_model, LORA_ADAPTER_PATH, token=hf_token)
print("βœ… LoRA adapter downloaded.")


print("--- Model Pre-downloading Complete ---")