Spaces:
Sleeping
Sleeping
File size: 3,749 Bytes
874d8b7 92cc8c7 a627a88 92cc8c7 a627a88 874d8b7 92cc8c7 a627a88 53128f5 874d8b7 53128f5 1f4e090 874d8b7 90dc3ae 874d8b7 53128f5 874d8b7 90dc3ae 874d8b7 90dc3ae 874d8b7 53128f5 874d8b7 53128f5 a627a88 874d8b7 1f4e090 874d8b7 1f4e090 92cc8c7 a627a88 874d8b7 a627a88 92cc8c7 874d8b7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 |
import json
import spaces
import subprocess
import gradio as gr
from llama_cpp import Llama
from huggingface_hub import hf_hub_download
CSS = """
#qwen-md .katex-display { display: inline; }
#qwen-md .katex-display>.katex { display: inline; }
#qwen-md .katex-display>.katex>.katex-html { display: inline; }
"""
hf_hub_download(
repo_id="bartowski/Qwen2.5-Math-7B-Instruct-GGUF",
filename="Qwen2.5-Math-7B-Instruct-Q6_K_L.gguf",
local_dir="./models",
)
llm = Llama(
model_path="models/Qwen2.5-Math-7B-Instruct-Q6_K_L.gguf",
flash_attn=True,
n_ctx=8192,
n_batch=1024,
chat_format="chatml",
)
# Gradio 组件
output_md = gr.Markdown(
label="Answer",
value="Answer will be presented here",
latex_delimiters=[
{"left": "\\(", "right": "\\)", "display": True},
{"left": "\\begin\{equation\}", "right": "\\end\{equation\}", "display": True},
{"left": "\\begin\{align\}", "right": "\\end\{align\}", "display": True},
{"left": "\\begin\{alignat\}", "right": "\\end\{alignat\}", "display": True},
{"left": "\\begin\{gather\}", "right": "\\end\{gather\}", "display": True},
{"left": "\\begin\{CD\}", "right": "\\end\{CD\}", "display": True},
{"left": "\\[", "right": "\\]", "display": True},
],
elem_id="qwen-md",
show_copy_button=True,
container=True,
render=False,
)
target_lang = gr.Dropdown(
choices=["Chinese", "English"],
value="Chinese",
label="Output Language",
interactive=True,
render=False,
)
new_tokens = gr.Slider(
minimum=1, maximum=8192, value=2048, step=1, label="Max new tokens", render=False
)
temperature = gr.Slider(
minimum=0, maximum=2.0, value=0.5, step=0.1, label="Temperature", render=False
)
top_p = gr.Slider(minimum=0.0, maximum=1.0, value=0.95, step=0.05, label="Top P", render=False)
input_text = gr.Textbox(label="Ask math questions here", render=False)
submit_btn = gr.Button(value="Ask", render=False)
banner = gr.Markdown(value="""
# 📖 Qwen2.5-Math GGUF
This chatbot is based on the Qwen2.5-Math-7B-Instruct-GGUF model. You can ask it any math problem.
"""
)
# Gradio 函数
def respond(
input_text,
lang="Chinese",
max_tokens=300,
temperature=0.5,
top_p=0.9,
):
if lang == "Chinese":
sys_msg = "你是一个乐于助人的数学助手. 你使用中文回答问题"
else:
sys_msg = "You are a helpful math assistant. You should always provide your answer in English."
messages = [
{
"role": "system",
"content": sys_msg,
},
{"role": "user", "content": input_text},
]
response = ""
response = llm.create_chat_completion(
messages=messages,
stream=True,
max_tokens=max_tokens,
temperature=temperature,
top_p=top_p,
)
message_repl = ""
for chunk in response:
if len(chunk['choices'][0]["delta"]) != 0 and "content" in chunk['choices'][0]["delta"]:
message_repl = message_repl + \
chunk['choices'][0]["delta"]["content"]
yield message_repl
with gr.Blocks(css=CSS, theme="NoCrypt/miku") as demo:
submit_btn.click(
fn=respond,
inputs=[input_text, target_lang, new_tokens, temperature, top_p],
outputs=output_md,
)
with gr.Column():
banner.render()
with gr.Row():
with gr.Column():
input_text.render()
target_lang.render()
new_tokens.render()
temperature.render()
top_p.render()
submit_btn.render()
with gr.Column():
output_md.render()
if __name__ == "__main__":
demo.launch() |