File size: 3,159 Bytes
c89af74
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d03cff4
7cb952c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d03cff4
c89af74
7cb952c
d03cff4
 
 
 
 
 
 
 
 
c89af74
d03cff4
c89af74
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
import gradio as gr
import spaces
import torch
from threading import Thread
from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer

model_name = "entfane/math-virtuoso-7B"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="auto",
)
model.eval()

eos_token = tokenizer.eos_token_id

@spaces.GPU()
def chat_math_virtuoso(message: str, temperature: float = 0.7, max_new_tokens: int = 2048):
    """
    Generate a streaming response with Math Virtuoso 7B.
    """
    conversation = [{"role": "user", "content": message}]
    
    input_ids = tokenizer.apply_chat_template(
        conversation,
        add_generation_prompt=True,
        return_tensors="pt"
    ).to(model.device)

    streamer = TextIteratorStreamer(
        tokenizer,
        timeout=10.0,
        skip_prompt=True,
        skip_special_tokens=True
    )

    generate_kwargs = dict(
        input_ids=input_ids,
        streamer=streamer,
        max_new_tokens=max_new_tokens,
        temperature=temperature,
        eos_token_id=eos_token,
        pad_token_id=eos_token
    )

    thread = Thread(target=model.generate, kwargs=generate_kwargs)
    thread.start()

    outputs = []
    for token in streamer:
        outputs.append(token)
        yield "".join(outputs)

with gr.Blocks(css="""
    body {
        background: linear-gradient(135deg, #f0f2ff, #d9e4ff);
        font-family: 'Helvetica Neue', Arial, sans-serif;
    }
    .gr-block {
        border-radius: 20px;
        box-shadow: 0 10px 30px rgba(0,0,0,0.12);
        padding: 20px;
    }
    .gr-button {
        background: linear-gradient(90deg, #6C63FF, #8B78FF);
        color: white;
        font-weight: bold;
        border-radius: 12px;
        padding: 10px 25px;
        transition: all 0.3s ease;
    }
    .gr-button:hover {
        transform: translateY(-2px);
        box-shadow: 0 6px 15px rgba(0,0,0,0.2);
    }
    .gr-textbox {
        border-radius: 12px;
        border: 1px solid #ccc;
        padding: 10px;
        transition: border-color 0.3s;
    }
    .gr-textbox:focus {
        border-color: #6C63FF;
        box-shadow: 0 0 8px rgba(108, 99, 255, 0.3);
    }
    .gr-slider .noUi-connect {
        background: #6C63FF;
    }
    h2 {
        text-align: center;
        color: #6C63FF;
        font-weight: 700;
        margin-bottom: 25px;
    }
""") as demo:

    gr.Markdown("<h2>🤖 Math Virtuoso 7B Chat</h2>")
    
    with gr.Row():
        with gr.Column(scale=2):
            msg = gr.Textbox(label="Your question", placeholder="Type a math question...", lines=2)
            temperature = gr.Slider(0, 1, value=0.7, step=0.1, label="Temperature")
            max_tokens = gr.Slider(50, 2048, value=512, step=50, label="Max new tokens")
            submit_btn = gr.Button("Ask")
        with gr.Column(scale=3):
            output = gr.Textbox(label="Model reply", lines=15, interactive=False)

    submit_btn.click(
        fn=chat_math_virtuoso,
        inputs=[msg, temperature, max_tokens],
        outputs=output
    )

if __name__ == "__main__":
    demo.launch()