my-fresh-gen / app.py.old.anycoder
Javedalam's picture
Rename app.py to app.py.old.anycoder
c878c9a verified
raw
history blame
2.77 kB
import gradio as gr
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
import spaces
# Model config
MODEL_ID = "WeiboAI/VibeThinker-1.5B"
SYSTEM_PROMPT = "You are a concise solver. Respond briefly."
# Load model
print("Loading model...")
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
model = AutoModelForCausalLM.from_pretrained(
MODEL_ID,
torch_dtype=torch.float16,
device_map="auto",
)
print("Model loaded!")
@spaces.GPU
def chat_with_stream(message, history, progress=gr.Progress()):
"""Chat with streaming output"""
# Handle inputs safely
if message is None:
message = "Hello"
if history is None:
history = []
# Convert to string
message = str(message)
progress(0.1, desc="Building conversation...")
# Build messages
messages = [{"role": "system", "content": SYSTEM_PROMPT}]
# Add history
for user_msg, assistant_msg in history:
if user_msg is not None:
messages.append({"role": "user", "content": str(user_msg)})
if assistant_msg is not None:
messages.append({"role": "assistant", "content": str(assistant_msg)})
progress(0.3, desc="Adding your message...")
messages.append({"role": "user", "content": message})
progress(0.5, desc="Formatting input...")
prompt = tokenizer.apply_chat_template(
messages,
tokenize=False,
add_generation_prompt=True
)
progress(0.6, desc="Tokenizing...")
inputs = tokenizer([prompt], return_tensors="pt").to(model.device)
progress(0.7, desc="Starting generation...")
# Generate with streaming
with torch.no_grad():
outputs = model.generate(
**inputs,
max_new_tokens=1000,
do_sample=True,
temperature=0.7,
top_p=0.9,
pad_token_id=tokenizer.eos_token_id,
return_dict_in_generate=True,
output_scores=False,
)
progress(0.9, desc="Decoding response...")
# Decode
full_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
# Extract assistant response
if "assistant" in full_text:
response = full_text.split("assistant")[-1].strip()
else:
response = full_text
progress(1.0, desc="Complete!")
return response
def create_demo():
"""Create simple demo"""
demo = gr.ChatInterface(
fn=chat_with_stream,
title="VibeThinker Chat",
description="Simple chat with VibeThinker-1.5B",
examples=["2+2", "What is AI?", "Write a poem"]
)
return demo
if __name__ == "__main__":
print("Starting...")
demo = create_demo()
demo.launch(share=False)