File size: 3,749 Bytes
874d8b7
 
 
92cc8c7
a627a88
 
92cc8c7
a627a88
 
874d8b7
 
92cc8c7
a627a88
 
 
 
 
 
 
53128f5
 
874d8b7
 
 
53128f5
 
1f4e090
874d8b7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
90dc3ae
874d8b7
 
 
53128f5
874d8b7
 
 
 
90dc3ae
874d8b7
90dc3ae
874d8b7
 
 
 
 
53128f5
874d8b7
 
 
 
53128f5
 
a627a88
874d8b7
 
1f4e090
 
 
 
 
 
874d8b7
 
 
 
 
 
1f4e090
92cc8c7
a627a88
 
874d8b7
a627a88
 
 
 
 
 
 
 
 
 
 
 
 
 
 
92cc8c7
 
874d8b7
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
import json
import spaces
import subprocess
import gradio as gr
from llama_cpp import Llama
from huggingface_hub import hf_hub_download

CSS = """
#qwen-md .katex-display { display: inline; }
#qwen-md .katex-display>.katex { display: inline; }
#qwen-md .katex-display>.katex>.katex-html { display: inline; }
"""

hf_hub_download(
    repo_id="bartowski/Qwen2.5-Math-7B-Instruct-GGUF",
    filename="Qwen2.5-Math-7B-Instruct-Q6_K_L.gguf",
    local_dir="./models",
)

llm = Llama(
    model_path="models/Qwen2.5-Math-7B-Instruct-Q6_K_L.gguf",
    flash_attn=True,
    n_ctx=8192,
    n_batch=1024,
    chat_format="chatml",
)

# Gradio 组件
output_md = gr.Markdown(
    label="Answer",
    value="Answer will be presented here",
    latex_delimiters=[
        {"left": "\\(", "right": "\\)", "display": True},
        {"left": "\\begin\{equation\}", "right": "\\end\{equation\}", "display": True},
        {"left": "\\begin\{align\}", "right": "\\end\{align\}", "display": True},
        {"left": "\\begin\{alignat\}", "right": "\\end\{alignat\}", "display": True},
        {"left": "\\begin\{gather\}", "right": "\\end\{gather\}", "display": True},
        {"left": "\\begin\{CD\}", "right": "\\end\{CD\}", "display": True},
        {"left": "\\[", "right": "\\]", "display": True},
    ],
    elem_id="qwen-md",
    show_copy_button=True,
    container=True,
    render=False,
)
target_lang = gr.Dropdown(
    choices=["Chinese", "English"],
    value="Chinese",
    label="Output Language",
    interactive=True,
    render=False,
)
new_tokens = gr.Slider(
    minimum=1, maximum=8192, value=2048, step=1, label="Max new tokens", render=False
)
temperature = gr.Slider(
    minimum=0, maximum=2.0, value=0.5, step=0.1, label="Temperature", render=False
)
top_p = gr.Slider(minimum=0.0, maximum=1.0, value=0.95, step=0.05, label="Top P", render=False)
input_text = gr.Textbox(label="Ask math questions here", render=False)
submit_btn = gr.Button(value="Ask", render=False)
banner = gr.Markdown(value="""
# 📖 Qwen2.5-Math GGUF
This chatbot is based on the Qwen2.5-Math-7B-Instruct-GGUF model. You can ask it any math problem.
"""
)


# Gradio 函数
def respond(
    input_text,
    lang="Chinese",
    max_tokens=300,
    temperature=0.5,
    top_p=0.9,
):
    if lang == "Chinese":
        sys_msg = "你是一个乐于助人的数学助手. 你使用中文回答问题"
    else:
        sys_msg = "You are a helpful math assistant. You should always provide your answer in English."
    messages = [
        {
            "role": "system",
            "content": sys_msg,
        },
        {"role": "user", "content": input_text},
    ]

    response = ""
    response = llm.create_chat_completion(
        messages=messages,
        stream=True,
        max_tokens=max_tokens,
        temperature=temperature,
        top_p=top_p,
    )
    message_repl = ""
    for chunk in response:
        if len(chunk['choices'][0]["delta"]) != 0 and "content" in chunk['choices'][0]["delta"]:
            message_repl = message_repl + \
                chunk['choices'][0]["delta"]["content"]
        yield message_repl


with gr.Blocks(css=CSS, theme="NoCrypt/miku") as demo:
    submit_btn.click(
        fn=respond,
        inputs=[input_text, target_lang, new_tokens, temperature, top_p],
        outputs=output_md,
    )
    with gr.Column():
        banner.render()
        with gr.Row():
            with gr.Column():
                input_text.render()
                target_lang.render()
                new_tokens.render()
                temperature.render()
                top_p.render()
                submit_btn.render()
            with gr.Column():
                output_md.render()

if __name__ == "__main__":
    demo.launch()