skullcandy42 commited on
Commit
1f4e090
·
verified ·
1 Parent(s): 321d6f6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +60 -56
app.py CHANGED
@@ -1,7 +1,5 @@
1
- import json
2
- import spaces
3
- import subprocess
4
  import gradio as gr
 
5
  from llama_cpp import Llama
6
  from huggingface_hub import hf_hub_download
7
 
@@ -17,25 +15,46 @@ hf_hub_download(
17
  local_dir="./models",
18
  )
19
 
20
- llm = Llama(
21
- model_path="models/Qwen2.5-Math-7B-Instruct-Q6_K_L.gguf",
22
- flash_attn=True,
23
- n_ctx=8192,
24
- n_batch=1024,
25
- chat_format="chatml",
26
- )
 
 
 
 
 
 
 
 
27
 
28
- # Gradio 组件
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  output_md = gr.Markdown(
30
  label="Answer",
31
  value="Answer will be presented here",
32
  latex_delimiters=[
33
  {"left": "\\(", "right": "\\)", "display": True},
34
- {"left": "\\begin\{equation\}", "right": "\\end\{equation\}", "display": True},
35
- {"left": "\\begin\{align\}", "right": "\\end\{align\}", "display": True},
36
- {"left": "\\begin\{alignat\}", "right": "\\end\{alignat\}", "display": True},
37
- {"left": "\\begin\{gather\}", "right": "\\end\{gather\}", "display": True},
38
- {"left": "\\begin\{CD\}", "right": "\\end\{CD\}", "display": True},
39
  {"left": "\\[", "right": "\\]", "display": True},
40
  ],
41
  elem_id="qwen-md",
@@ -43,6 +62,7 @@ output_md = gr.Markdown(
43
  container=True,
44
  render=False,
45
  )
 
46
  target_lang = gr.Dropdown(
47
  choices=["Chinese", "English"],
48
  value="Chinese",
@@ -50,61 +70,45 @@ target_lang = gr.Dropdown(
50
  interactive=True,
51
  render=False,
52
  )
 
53
  new_tokens = gr.Slider(
54
- minimum=1, maximum=8192, value=2048, step=1, label="Max new tokens", render=False
55
  )
 
56
  temperature = gr.Slider(
57
- minimum=0, maximum=2.0, value=0.5, step=0.1, label="Temperature", render=False
58
  )
59
- top_p = gr.Slider(minimum=0.0, maximum=1.0, value=0.95, step=0.05, label="Top P", render=False)
 
 
 
 
60
  input_text = gr.Textbox(label="Ask math questions here", render=False)
61
  submit_btn = gr.Button(value="Ask", render=False)
 
62
  banner = gr.Markdown(value="""
63
  # 📖 Qwen2.5-Math GGUF
64
- This WebUI is based on Qwen2.5-Math-7B-Instruct-GGUF for mathematical reasoning. You can input texts of mathematical or arithmetic problems.
65
- """
66
- )
67
-
68
 
69
- # Gradio 函数
70
- def respond(
71
- input_text,
72
- lang="Chinese",
73
- max_tokens=2048,
74
- temperature=0.5,
75
- top_p=0.95,
76
- ):
77
- if lang == "Chinese":
78
- sys_msg = "你是一个乐于助人的数学助手. 你使用中文回答问题"
79
- else:
80
- sys_msg = "You are a helpful math assistant. You should always provide your answer in English."
81
  messages = [
82
- {
83
- "role": "system",
84
- "content": sys_msg,
85
- },
86
  {"role": "user", "content": input_text},
87
  ]
88
 
89
- response = ""
90
- response = llm.create_chat_completion(
91
- messages=messages,
92
- stream=True,
93
- max_tokens=max_tokens,
94
- temperature=temperature,
95
- top_p=top_p,
96
- )
97
- message_repl = ""
98
- for chunk in response:
99
- if len(chunk['choices'][0]["delta"]) != 0 and "content" in chunk['choices'][0]["delta"]:
100
- message_repl = message_repl + \
101
- chunk['choices'][0]["delta"]["content"]
102
- yield message_repl
103
-
104
 
105
  with gr.Blocks(css=CSS, theme="NoCrypt/miku") as demo:
106
  submit_btn.click(
107
- fn=respond,
108
  inputs=[input_text, target_lang, new_tokens, temperature, top_p],
109
  outputs=output_md,
110
  )
@@ -122,4 +126,4 @@ with gr.Blocks(css=CSS, theme="NoCrypt/miku") as demo:
122
  output_md.render()
123
 
124
  if __name__ == "__main__":
125
- demo.launch()
 
 
 
 
1
  import gradio as gr
2
+ import spaces
3
  from llama_cpp import Llama
4
  from huggingface_hub import hf_hub_download
5
 
 
15
  local_dir="./models",
16
  )
17
 
18
+ # 模型加载函数明确包装GPU环境
19
+ @spaces.GPU
20
+ def load_llm():
21
+ llm = Llama(
22
+ model_path="models/Qwen2.5-Math-7B-Instruct-Q6_K_L.gguf",
23
+ flash_attn=True,
24
+ n_ctx=4096,
25
+ n_batch=512,
26
+ chat_format="chatml",
27
+ n_gpu_layers=-1,
28
+ )
29
+ return llm
30
+
31
+ # 全局加载模型(仅加载一次,提升速度)
32
+ llm = load_llm()
33
 
34
+ # 模型推理函数也使用GPU环境
35
+ @spaces.GPU
36
+ def generate_response(messages, max_tokens, temperature, top_p):
37
+ response = llm.create_chat_completion(
38
+ messages=messages,
39
+ stream=True,
40
+ max_tokens=max_tokens,
41
+ temperature=temperature,
42
+ top_p=top_p,
43
+ )
44
+
45
+ message_repl = ""
46
+ for chunk in response:
47
+ delta = chunk['choices'][0]["delta"]
48
+ if "content" in delta:
49
+ message_repl += delta["content"]
50
+ yield message_repl
51
+
52
+ # Gradio组件设置
53
  output_md = gr.Markdown(
54
  label="Answer",
55
  value="Answer will be presented here",
56
  latex_delimiters=[
57
  {"left": "\\(", "right": "\\)", "display": True},
 
 
 
 
 
58
  {"left": "\\[", "right": "\\]", "display": True},
59
  ],
60
  elem_id="qwen-md",
 
62
  container=True,
63
  render=False,
64
  )
65
+
66
  target_lang = gr.Dropdown(
67
  choices=["Chinese", "English"],
68
  value="Chinese",
 
70
  interactive=True,
71
  render=False,
72
  )
73
+
74
  new_tokens = gr.Slider(
75
+ minimum=1, maximum=1024, value=128, step=1, label="Max new tokens", render=False
76
  )
77
+
78
  temperature = gr.Slider(
79
+ minimum=0, maximum=1.0, value=0.2, step=0.1, label="Temperature", render=False
80
  )
81
+
82
+ top_p = gr.Slider(
83
+ minimum=0.0, maximum=1.0, value=0.8, step=0.05, label="Top P", render=False
84
+ )
85
+
86
  input_text = gr.Textbox(label="Ask math questions here", render=False)
87
  submit_btn = gr.Button(value="Ask", render=False)
88
+
89
  banner = gr.Markdown(value="""
90
  # 📖 Qwen2.5-Math GGUF
91
+ Fast mathematical reasoning with ZeroGPU optimized.
92
+ """)
 
 
93
 
94
+ # Gradio响应函数,调用GPU推理
95
+ def respond(input_text, lang, max_tokens, temperature, top_p):
96
+ sys_msg = (
97
+ "你是一个乐于助人的数学助手. 你使用中文回答问题"
98
+ if lang == "Chinese" else
99
+ "You are a helpful math assistant. You should always provide your answer in English."
100
+ )
 
 
 
 
 
101
  messages = [
102
+ {"role": "system", "content": sys_msg},
 
 
 
103
  {"role": "user", "content": input_text},
104
  ]
105
 
106
+ # GPU生成器直接返回
107
+ yield from generate_response(messages, max_tokens, temperature, top_p)
 
 
 
 
 
 
 
 
 
 
 
 
 
108
 
109
  with gr.Blocks(css=CSS, theme="NoCrypt/miku") as demo:
110
  submit_btn.click(
111
+ respond,
112
  inputs=[input_text, target_lang, new_tokens, temperature, top_p],
113
  outputs=output_md,
114
  )
 
126
  output_md.render()
127
 
128
  if __name__ == "__main__":
129
+ demo.launch()