Spaces:

akhaliq
/

VibeThinker-1.5B

Running on Zero

akhaliq HF Staff commited on 26 days ago

Commit

1e44906

verified ·

1 Parent(s): 57eea15

Update Gradio app with multiple files

Files changed (1) hide show

app.py CHANGED Viewed

@@ -2,6 +2,7 @@ import gradio as gr
 from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig
 import torch
 import spaces
 # Initialize the model and tokenizer
@@ -20,6 +21,19 @@ tokenizer = AutoTokenizer.from_pretrained(
 print("Model loaded successfully!")
 @spaces.GPU
 def respond(message, history):
     """
@@ -68,6 +82,9 @@ def respond(message, history):
     # Decode - skip special tokens to filter out think tokens
     response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
     # For streaming effect, yield character by character
     partial_response = ""
     for char in response:

 from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig
 import torch
 import spaces
+import re
 # Initialize the model and tokenizer
 print("Model loaded successfully!")
+def filter_think_tags(text):
+    """
+    Remove ... tags and their content from the response.
+    """
+    # Remove ... blocks including the tags
+    text = re.sub(r'.*?', '', text, flags=re.DOTALL)
+    # Remove any remaining opening or closing think tags
+    text = re.sub(r'</?think>', '', text)
+    # Clean up extra whitespace
+    text = re.sub(r'\n\s*\n', '\n', text).strip()
+    return text
 @spaces.GPU
 def respond(message, history):
     """
     # Decode - skip special tokens to filter out think tokens
     response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
+    # Filter out  tags and their content
+    response = filter_think_tags(response)
     # For streaming effect, yield character by character
     partial_response = ""
     for char in response: