Spaces:
Running
on
Zero
Running
on
Zero
Update Gradio app with multiple files
Browse files
app.py
CHANGED
|
@@ -2,6 +2,7 @@ import gradio as gr
|
|
| 2 |
from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig
|
| 3 |
import torch
|
| 4 |
import spaces
|
|
|
|
| 5 |
|
| 6 |
|
| 7 |
# Initialize the model and tokenizer
|
|
@@ -20,6 +21,19 @@ tokenizer = AutoTokenizer.from_pretrained(
|
|
| 20 |
print("Model loaded successfully!")
|
| 21 |
|
| 22 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
@spaces.GPU
|
| 24 |
def respond(message, history):
|
| 25 |
"""
|
|
@@ -68,6 +82,9 @@ def respond(message, history):
|
|
| 68 |
# Decode - skip special tokens to filter out think tokens
|
| 69 |
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
|
| 70 |
|
|
|
|
|
|
|
|
|
|
| 71 |
# For streaming effect, yield character by character
|
| 72 |
partial_response = ""
|
| 73 |
for char in response:
|
|
|
|
| 2 |
from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig
|
| 3 |
import torch
|
| 4 |
import spaces
|
| 5 |
+
import re
|
| 6 |
|
| 7 |
|
| 8 |
# Initialize the model and tokenizer
|
|
|
|
| 21 |
print("Model loaded successfully!")
|
| 22 |
|
| 23 |
|
| 24 |
+
def filter_think_tags(text):
|
| 25 |
+
"""
|
| 26 |
+
Remove ... tags and their content from the response.
|
| 27 |
+
"""
|
| 28 |
+
# Remove ... blocks including the tags
|
| 29 |
+
text = re.sub(r'.*?', '', text, flags=re.DOTALL)
|
| 30 |
+
# Remove any remaining opening or closing think tags
|
| 31 |
+
text = re.sub(r'</?think>', '', text)
|
| 32 |
+
# Clean up extra whitespace
|
| 33 |
+
text = re.sub(r'\n\s*\n', '\n', text).strip()
|
| 34 |
+
return text
|
| 35 |
+
|
| 36 |
+
|
| 37 |
@spaces.GPU
|
| 38 |
def respond(message, history):
|
| 39 |
"""
|
|
|
|
| 82 |
# Decode - skip special tokens to filter out think tokens
|
| 83 |
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
|
| 84 |
|
| 85 |
+
# Filter out tags and their content
|
| 86 |
+
response = filter_think_tags(response)
|
| 87 |
+
|
| 88 |
# For streaming effect, yield character by character
|
| 89 |
partial_response = ""
|
| 90 |
for char in response:
|