akhaliq's picture
akhaliq HF Staff
Update Gradio app with multiple files
7359698 verified
raw
history blame
3.56 kB
import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig
import torch
import spaces
import re
# Initialize the model and tokenizer
print("Loading VibeThinker model...")
model = AutoModelForCausalLM.from_pretrained(
"WeiboAI/VibeThinker-1.5B",
low_cpu_mem_usage=True,
torch_dtype=torch.bfloat16,
device_map="auto",
trust_remote_code=True
)
tokenizer = AutoTokenizer.from_pretrained(
"WeiboAI/VibeThinker-1.5B",
trust_remote_code=True
)
print("Model loaded successfully!")
@spaces.GPU
def respond(message, history):
"""
Generate streaming response for the chatbot.
Args:
message: The user's current message
history: List of previous conversation messages
"""
# Build messages from history
messages = history if history else []
# Add current message
messages.append({"role": "user", "content": message})
# Apply chat template
text = tokenizer.apply_chat_template(
messages,
tokenize=False,
add_generation_prompt=True
)
# Tokenize
model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
# Generation config - using dict format as in official docs
generation_config = dict(
max_new_tokens=2048,
do_sample=True,
temperature=0.6,
top_p=0.95,
top_k=None
)
# Generate - passing GenerationConfig exactly as in docs
generated_ids = model.generate(
**model_inputs,
generation_config=GenerationConfig(**generation_config)
)
# Trim input from output - exactly as in official docs
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
# Decode - skip special tokens will help but we'll also filter manually
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
# For streaming effect, yield character by character
partial_response = ""
for char in response:
partial_response += char
yield partial_response
# Create the Gradio interface
with gr.Blocks(
theme=gr.themes.Soft(),
css="""
.header-link { text-decoration: none; color: inherit; }
.header-link:hover { text-decoration: underline; }
"""
) as demo:
gr.Markdown(
"""
# 💭 VibeThinker Chatbot
Chat with [WeiboAI/VibeThinker-1.5B](https://huggingface.co/WeiboAI/VibeThinker-1.5B) - a powerful conversational AI model.
<a href="https://huggingface.co/spaces/akhaliq/anycoder" class="header-link">Built with anycoder</a>
"""
)
gr.ChatInterface(
fn=respond,
type="messages",
title="",
description="Ask me anything! I'm powered by VibeThinker with ZeroGPU acceleration.",
examples=[
"What is 2 + 2?",
"Tell me a short joke",
"What is the capital of France?",
"Explain AI in one sentence",
],
cache_examples=False,
chatbot=gr.Chatbot(allow_tags=["think"]),
)
gr.Markdown(
"""
### About VibeThinker
VibeThinker is a 1.5B parameter conversational AI model designed for engaging and thoughtful conversations.
The model uses temperature sampling (0.6) for balanced creativity and coherence.
**Powered by ZeroGPU** for efficient GPU resource allocation.
"""
)
if __name__ == "__main__":
demo.launch()