Spaces:

CedricZ
/

HS25_LLM_Assignment_4

Sleeping

CedricZ commited on Oct 8, 2025

Commit

66fe08f

1 Parent(s): 19df4a4

fixes

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import gradio as gr
 import os
 import requests
 API_KEY = os.getenv("access_token")
@@ -24,13 +25,24 @@ def respond(
         "top_p": top_p
     }
 )
-    return response.json()["choices"][0]["text"]
 demo = gr.Interface(
     fn=respond,
     title="Text Generation Playground",
-    description="""Model: meta/Meta-Llama-3.1-8B, Max input length: 1256 characters\n
 Prompts longer than 1256 characters will be truncated, which may lead to incomplete or unexpected responses.\n
 Try again or refresh the page if you encounter an error.""",
     inputs=[
@@ -47,7 +59,7 @@ E.g. # System-level prompt: You are a helpful assistant...\n
 # **Due to the limitations of the service, keep your prompt under 1256 characters.**""",
         ),
-        gr.Slider(minimum=1, maximum=512, value=150, step=1, label="Max new tokens"),
         gr.Slider(minimum=0, maximum=2.0, value=1, step=0.01, label="Temperature"),
         gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05,label="Top-p (nucleus sampling)"),
     ],

 import gradio as gr
 import os
 import requests
+import json
 API_KEY = os.getenv("access_token")
         "top_p": top_p
     }
 )
+    try:
+        output = response.json()["choices"][0]["text"]
+    except requests.exceptions.JSONDecodeError as e:
+        cleaned_output = response.text.strip().rstrip('%')
+        output = json.loads(cleaned_output)
+    if output['error']['code'] == 'concurrency_limit_exceeded':
+        return "Error: Concurrency limit exceeded. Try submit your request again in a few seconds."
+    elif 'error' in output:
+        return f"Error: {output['error']['message']}. Refresh the page and try again later, post the error on OLAT forum if your issue persists."
+    else:
+        return output
 demo = gr.Interface(
     fn=respond,
     title="Text Generation Playground",
+    description="""Model: Qwen/Qwen3-8B, Max input length: 1256 characters\n
 Prompts longer than 1256 characters will be truncated, which may lead to incomplete or unexpected responses.\n
 Try again or refresh the page if you encounter an error.""",
     inputs=[
 # **Due to the limitations of the service, keep your prompt under 1256 characters.**""",
         ),
+        gr.Slider(minimum=1, maximum=300, value=150, step=1, label="Max new tokens"),
         gr.Slider(minimum=0, maximum=2.0, value=1, step=0.01, label="Temperature"),
         gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05,label="Top-p (nucleus sampling)"),
     ],