CedricZ commited on
Commit
66fe08f
·
1 Parent(s): 19df4a4
Files changed (1) hide show
  1. app.py +15 -3
app.py CHANGED
@@ -1,6 +1,7 @@
1
  import gradio as gr
2
  import os
3
  import requests
 
4
 
5
  API_KEY = os.getenv("access_token")
6
 
@@ -24,13 +25,24 @@ def respond(
24
  "top_p": top_p
25
  }
26
  )
27
- return response.json()["choices"][0]["text"]
 
 
 
 
 
 
 
 
 
 
 
28
 
29
 
30
  demo = gr.Interface(
31
  fn=respond,
32
  title="Text Generation Playground",
33
- description="""Model: meta/Meta-Llama-3.1-8B, Max input length: 1256 characters\n
34
  Prompts longer than 1256 characters will be truncated, which may lead to incomplete or unexpected responses.\n
35
  Try again or refresh the page if you encounter an error.""",
36
  inputs=[
@@ -47,7 +59,7 @@ E.g. # System-level prompt: You are a helpful assistant...\n
47
 
48
  # **Due to the limitations of the service, keep your prompt under 1256 characters.**""",
49
  ),
50
- gr.Slider(minimum=1, maximum=512, value=150, step=1, label="Max new tokens"),
51
  gr.Slider(minimum=0, maximum=2.0, value=1, step=0.01, label="Temperature"),
52
  gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05,label="Top-p (nucleus sampling)"),
53
  ],
 
1
  import gradio as gr
2
  import os
3
  import requests
4
+ import json
5
 
6
  API_KEY = os.getenv("access_token")
7
 
 
25
  "top_p": top_p
26
  }
27
  )
28
+ try:
29
+ output = response.json()["choices"][0]["text"]
30
+ except requests.exceptions.JSONDecodeError as e:
31
+ cleaned_output = response.text.strip().rstrip('%')
32
+ output = json.loads(cleaned_output)
33
+
34
+ if output['error']['code'] == 'concurrency_limit_exceeded':
35
+ return "Error: Concurrency limit exceeded. Try submit your request again in a few seconds."
36
+ elif 'error' in output:
37
+ return f"Error: {output['error']['message']}. Refresh the page and try again later, post the error on OLAT forum if your issue persists."
38
+ else:
39
+ return output
40
 
41
 
42
  demo = gr.Interface(
43
  fn=respond,
44
  title="Text Generation Playground",
45
+ description="""Model: Qwen/Qwen3-8B, Max input length: 1256 characters\n
46
  Prompts longer than 1256 characters will be truncated, which may lead to incomplete or unexpected responses.\n
47
  Try again or refresh the page if you encounter an error.""",
48
  inputs=[
 
59
 
60
  # **Due to the limitations of the service, keep your prompt under 1256 characters.**""",
61
  ),
62
+ gr.Slider(minimum=1, maximum=300, value=150, step=1, label="Max new tokens"),
63
  gr.Slider(minimum=0, maximum=2.0, value=1, step=0.01, label="Temperature"),
64
  gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05,label="Top-p (nucleus sampling)"),
65
  ],