import gradio as gr import spaces import torch from threading import Thread from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer model_name = "entfane/math-virtuoso-7B" tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForCausalLM.from_pretrained( model_name, device_map="auto", ) model.eval() eos_token = tokenizer.eos_token_id @spaces.GPU() def chat_math_virtuoso(message: str, temperature: float = 0.7, max_new_tokens: int = 2048): """ Generate a streaming response with Math Virtuoso 7B. """ conversation = [{"role": "user", "content": message}] input_ids = tokenizer.apply_chat_template( conversation, add_generation_prompt=True, return_tensors="pt" ).to(model.device) streamer = TextIteratorStreamer( tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True ) generate_kwargs = dict( input_ids=input_ids, streamer=streamer, max_new_tokens=max_new_tokens, temperature=temperature, eos_token_id=eos_token, pad_token_id=eos_token ) thread = Thread(target=model.generate, kwargs=generate_kwargs) thread.start() outputs = [] for token in streamer: outputs.append(token) yield "".join(outputs) with gr.Blocks(css=""" body { background: linear-gradient(135deg, #f0f2ff, #d9e4ff); font-family: 'Helvetica Neue', Arial, sans-serif; } .gr-block { border-radius: 20px; box-shadow: 0 10px 30px rgba(0,0,0,0.12); padding: 20px; } .gr-button { background: linear-gradient(90deg, #6C63FF, #8B78FF); color: white; font-weight: bold; border-radius: 12px; padding: 10px 25px; transition: all 0.3s ease; } .gr-button:hover { transform: translateY(-2px); box-shadow: 0 6px 15px rgba(0,0,0,0.2); } .gr-textbox { border-radius: 12px; border: 1px solid #ccc; padding: 10px; transition: border-color 0.3s; } .gr-textbox:focus { border-color: #6C63FF; box-shadow: 0 0 8px rgba(108, 99, 255, 0.3); } .gr-slider .noUi-connect { background: #6C63FF; } h2 { text-align: center; color: #6C63FF; font-weight: 700; margin-bottom: 25px; } """) as demo: gr.Markdown("