Spaces:
Running
on
Zero
Running
on
Zero
Update Gradio app with multiple files
Browse files
app.py
CHANGED
|
@@ -21,19 +21,6 @@ tokenizer = AutoTokenizer.from_pretrained(
|
|
| 21 |
print("Model loaded successfully!")
|
| 22 |
|
| 23 |
|
| 24 |
-
def filter_think_tags(text):
|
| 25 |
-
"""
|
| 26 |
-
Remove ... tags and their content from the response.
|
| 27 |
-
"""
|
| 28 |
-
# Remove ... blocks including the tags
|
| 29 |
-
text = re.sub(r'.*?', '', text, flags=re.DOTALL)
|
| 30 |
-
# Remove any remaining opening or closing think tags
|
| 31 |
-
text = re.sub(r'</?think>', '', text)
|
| 32 |
-
# Clean up extra whitespace
|
| 33 |
-
text = re.sub(r'\n\s*\n', '\n', text).strip()
|
| 34 |
-
return text
|
| 35 |
-
|
| 36 |
-
|
| 37 |
@spaces.GPU
|
| 38 |
def respond(message, history):
|
| 39 |
"""
|
|
@@ -79,12 +66,9 @@ def respond(message, history):
|
|
| 79 |
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
|
| 80 |
]
|
| 81 |
|
| 82 |
-
# Decode - skip special tokens
|
| 83 |
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
|
| 84 |
|
| 85 |
-
# Filter out tags and their content
|
| 86 |
-
response = filter_think_tags(response)
|
| 87 |
-
|
| 88 |
# For streaming effect, yield character by character
|
| 89 |
partial_response = ""
|
| 90 |
for char in response:
|
|
@@ -121,6 +105,7 @@ with gr.Blocks(
|
|
| 121 |
"Explain AI in one sentence",
|
| 122 |
],
|
| 123 |
cache_examples=False,
|
|
|
|
| 124 |
)
|
| 125 |
|
| 126 |
gr.Markdown(
|
|
|
|
| 21 |
print("Model loaded successfully!")
|
| 22 |
|
| 23 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 24 |
@spaces.GPU
|
| 25 |
def respond(message, history):
|
| 26 |
"""
|
|
|
|
| 66 |
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
|
| 67 |
]
|
| 68 |
|
| 69 |
+
# Decode - skip special tokens will help but we'll also filter manually
|
| 70 |
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
|
| 71 |
|
|
|
|
|
|
|
|
|
|
| 72 |
# For streaming effect, yield character by character
|
| 73 |
partial_response = ""
|
| 74 |
for char in response:
|
|
|
|
| 105 |
"Explain AI in one sentence",
|
| 106 |
],
|
| 107 |
cache_examples=False,
|
| 108 |
+
chatbot=gr.Chatbot(allow_tags=["think"]),
|
| 109 |
)
|
| 110 |
|
| 111 |
gr.Markdown(
|