MiniCrit_demo / app.py
wmaousley's picture
Update app.py
c7d81ab verified
import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer
import torch
import threading
MODEL = "wmaousley/MiniCrit-1.5B"
tokenizer = AutoTokenizer.from_pretrained(MODEL)
model = AutoModelForCausalLM.from_pretrained(
MODEL,
torch_dtype=torch.float16,
device_map="cpu"
)
def generate_stream(prompt):
"""Streaming generator."""
inputs = tokenizer(prompt, return_tensors="pt")
streamer = TextIteratorStreamer(tokenizer, skip_special_tokens=True)
generation_kwargs = dict(
**inputs,
max_new_tokens=200,
temperature=0.7,
do_sample=True,
streamer=streamer
)
thread = threading.Thread(target=model.generate, kwargs=generation_kwargs)
thread.start()
for new_token in streamer:
yield new_token
def chat_fn(message, history):
"""Formats chat history + generates streaming reply."""
# Build conversation text
conversation = ""
for user, bot in history:
conversation += f"User: {user}\nMiniCrit: {bot}\n"
conversation += f"User: {message}\nMiniCrit:"
# Stream tokens
reply = ""
for token in generate_stream(conversation):
reply += token
yield reply
# -------- UI --------
with gr.Blocks(theme=gr.themes.Base()) as demo:
gr.Markdown(
"""
<h1 style='text-align:center; color:#00eaff;'>
MiniCrit-1.5B Chat UI πŸš€
</h1>
<p style='text-align:center; color:gray;'>Enhanced Streaming Interface</p>
"""
)
chatbox = gr.Chatbot(
label="MiniCrit-1.5B",
height=500
)
with gr.Row():
msg = gr.Textbox(
placeholder="Ask something...",
label="Message",
scale=10
)
send = gr.Button("Send", variant="primary")
clear = gr.Button("Clear")
send.click(chat_fn, [msg, chatbox], chatbox)
send.click(lambda: "", None, msg)
clear.click(lambda: [], None, chatbox)
demo.launch(debug=True)