Dc-4nderson's picture
Update app.py
1f03555 verified
import torch
from fastapi import FastAPI
from pydantic import BaseModel
from transformers import AutoTokenizer, AutoModelForCausalLM, TextStreamer
from peft import PeftModel
import os
os.environ["TRANSFORMERS_CACHE"] = "/tmp/hf"
os.environ["HF_HOME"] = "/tmp/hf"
os.environ["HUGGINGFACE_HUB_CACHE"] = "/tmp/hf"
app = FastAPI()
BASE = "mistralai/Mistral-7B-Instruct-v0.2"
ADAPTER = "Dc-4nderson/transcript_summarizer_model"
print("Loading tokenizer...")
tokenizer = AutoTokenizer.from_pretrained(BASE)
print("Loading base model...")
model = AutoModelForCausalLM.from_pretrained(
BASE,
device_map="auto",
torch_dtype=torch.float16
)
print("Loading and merging LoRA...")
model = PeftModel.from_pretrained(model, ADAPTER)
model = model.merge_and_unload()
model.eval()
class SummarizeRequest(BaseModel):
transcript: str
@app.post("/summarize")
def summarize(req: SummarizeRequest):
"""Return chunked transcript summary using your LoRA model."""
instruction = (
"Break this transcript into sections when topics change.\n"
"Use -- between sections.\nTranscript:\n"
)
prompt = instruction + req.transcript
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
output = model.generate(
**inputs,
max_new_tokens=30000,
do_sample=False,
use_cache=True
)
text = tokenizer.decode(output[0], skip_special_tokens=True)
text = text.replace(prompt, "").strip()
return { "summary": text }