tiny-purr-1b

Sleeping

App Files Files Community

FlameF0X commited on Oct 10

Commit

982ecb5

verified ·

1 Parent(s): ffc7a61

Update app.py

Browse files

Files changed (1) hide show

app.py +75 -58

app.py CHANGED Viewed

@@ -2,26 +2,23 @@ import gradio as gr
 from transformers import AutoTokenizer, AutoModelForCausalLM
 from transformers import DistilBertTokenizerFast, DistilBertForSequenceClassification
 import torch
-# Model options
-model_options = {
-    "Tiny-Purr-350M-merged": "purrgpt-community/Tiny-Purr-350M-merged",
-    "Tiny-Purr-1B": "purrgpt-community/Tiny-Purr-1B"
-}
-# Load models and tokenizers
-models = {}
-tokenizers = {}
-for name, model_id in model_options.items():
-    tokenizers[name] = AutoTokenizer.from_pretrained(model_id)
-    models[name] = AutoModelForCausalLM.from_pretrained(
-        model_id,
-        device_map="auto",
-        torch_dtype=torch.bfloat16
-    )
-    models[name].eval()
-# PurrBERT safety model
 purrbert_model = DistilBertForSequenceClassification.from_pretrained("purrgpt-community/PurrBERT-v1")
 purrbert_tokenizer = DistilBertTokenizerFast.from_pretrained("purrgpt-community/PurrBERT-v1")
 purrbert_model.eval()
@@ -34,12 +31,13 @@ SAFETY_RESPONSE = (
     "let's keep our conversations on the good side, okay? purrrr."
 )
 SYSTEM_PROMPT = (
-    "<|system|>\n"
-    "You are Tiny-Purr, a friendly, sarcastic, playful AI assistant in the form of a cat developed by PurrGPT Community. "
-    "You respond in a fun, cat-like personality, sometimes using puns and playful humor. "
-    "Always keep your replies safe and friendly.\n"
-    "<|system|>\n"
 )
 def is_safe_prompt(prompt):
@@ -47,60 +45,79 @@ def is_safe_prompt(prompt):
     with torch.no_grad():
         outputs = purrbert_model(**inputs)
         pred = torch.argmax(outputs.logits, dim=-1).item()
-    return pred == 0  # True if SAFE, False if FLAGGED
 def format_history(history, message):
     chat_prompt = SYSTEM_PROMPT
     for user_msg, assistant_msg in history:
-        chat_prompt += f"<|user|>\n{user_msg}\n<|assistant|>\n{assistant_msg}\n"
-    chat_prompt += f"<|user|>\n{message}\n<|assistant|>\n"
     return chat_prompt
-def respond(message, history, model_choice):
-    # Safety check
     if not is_safe_prompt(message):
         return SAFETY_RESPONSE
-    tokenizer = tokenizers[model_choice]
-    model = models[model_choice]
     full_prompt = format_history(history, message)
     inputs = tokenizer(full_prompt, return_tensors="pt").to(model.device)
     with torch.no_grad():
         outputs = model.generate(
             **inputs,
             max_new_tokens=512,
-            temperature=0.4,
-            top_p=0.75,
             do_sample=True,
-            pad_token_id=tokenizer.eos_token_id
         )
     response = tokenizer.decode(outputs[0], skip_special_tokens=True)
     generated_text = response[len(full_prompt):].strip()
-    if "\n<|user|>" in generated_text:
-        assistant_response = generated_text.split("\n<|user|>")[0].strip()
     else:
         assistant_response = generated_text.strip()
-    return assistant_response
-# Gradio interface
-with gr.Blocks() as demo:
-    gr.Markdown("## Tiny-Purr Chat with Model Selection")
-    model_selector = gr.Dropdown(choices=list(model_options.keys()), value="Tiny-Purr-350M-merged", label="Choose Model")
-    chat = gr.Chatbot()
-    msg = gr.Textbox(label="Your Message")
-    submit_btn = gr.Button("Send")
-    def chat_interaction(message, history, model_choice):
-        response = respond(message, history, model_choice)
-        history = history + [(message, response)]
-        return history, history
-    submit_btn.click(chat_interaction, [msg, chat, model_selector], [chat, chat])
-    msg.submit(chat_interaction, [msg, chat, model_selector], [chat, chat])
-demo.launch()

 from transformers import AutoTokenizer, AutoModelForCausalLM
 from transformers import DistilBertTokenizerFast, DistilBertForSequenceClassification
 import torch
+from transformers import StoppingCriteria, StoppingCriteriaList
+# -----------------------------
+# 1. Load Tiny-Purr-1B
+# -----------------------------
+model_id = "purrgpt-community/Tiny-Purr-1B"  # replace with your merged model path
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+model = AutoModelForCausalLM.from_pretrained(
+    model_id,
+    device_map="auto",
+    torch_dtype=torch.bfloat16
+)
+model.eval()
+# -----------------------------
+# 2. Load PurrBERT safety model
+# -----------------------------
 purrbert_model = DistilBertForSequenceClassification.from_pretrained("purrgpt-community/PurrBERT-v1")
 purrbert_tokenizer = DistilBertTokenizerFast.from_pretrained("purrgpt-community/PurrBERT-v1")
 purrbert_model.eval()
     "let's keep our conversations on the good side, okay? purrrr."
 )
+# -----------------------------
+# 3. New chat format / template
+# -----------------------------
 SYSTEM_PROMPT = (
+    "<|startoftext|><|im_start|>system\n"
+    "You are Tiny-Purr, a friendly, playful, cat-like AI assistant developed by PurrGPT Community. "
+    "You respond in a fun, witty, and helpful manner, sometimes using puns or playful humor.\n<|im_end|>\n"
 )
 def is_safe_prompt(prompt):
     with torch.no_grad():
         outputs = purrbert_model(**inputs)
         pred = torch.argmax(outputs.logits, dim=-1).item()
+    return pred == 0  # True if SAFE
 def format_history(history, message):
     chat_prompt = SYSTEM_PROMPT
     for user_msg, assistant_msg in history:
+        chat_prompt += f"<|im_start|>user\n{user_msg}<|im_end|>\n"
+        chat_prompt += f"<|im_start|>assistant\n{assistant_msg}<|im_end|>\n"
+    chat_prompt += f"<|im_start|>user\n{message}<|im_end|>\n"
+    chat_prompt += f"<|im_start|>assistant\n"
     return chat_prompt
+class StopOnUserTag(StoppingCriteria):
+    def __init__(self, tokenizer):
+        self.stop_token_ids = tokenizer.encode("<|im_start|>user", add_special_tokens=False)
+    def __call__(self, input_ids, scores):
+        if len(input_ids[0]) >= len(self.stop_token_ids):
+            if input_ids[0][-len(self.stop_token_ids):].tolist() == self.stop_token_ids:
+                return True
+        return False
+stop_criteria = StoppingCriteriaList([StopOnUserTag(tokenizer)])
+def clean_repetition(text, max_repeat=3):
+    lines = text.splitlines()
+    counts = {}
+    clean = []
+    for line in lines:
+        counts[line] = counts.get(line, 0) + 1
+        if counts[line] <= max_repeat:
+            clean.append(line)
+    return "\n".join(clean)
+def respond(message, history):
     if not is_safe_prompt(message):
         return SAFETY_RESPONSE
     full_prompt = format_history(history, message)
     inputs = tokenizer(full_prompt, return_tensors="pt").to(model.device)
     with torch.no_grad():
         outputs = model.generate(
             **inputs,
             max_new_tokens=512,
+            temperature=0.7,
+            top_p=0.9,
+            repetition_penalty=1.2,
+            typical_p=0.95,
             do_sample=True,
+            pad_token_id=tokenizer.eos_token_id,
+            stopping_criteria=stop_criteria
         )
     response = tokenizer.decode(outputs[0], skip_special_tokens=True)
+    # Extract only the assistant response
     generated_text = response[len(full_prompt):].strip()
+    if "<|im_start|>user" in generated_text:
+        assistant_response = generated_text.split("<|im_start|>user")[0].strip()
     else:
         assistant_response = generated_text.strip()
+    assistant_response = clean_repetition(assistant_response)
+    return assistant_response
+# -----------------------------
+# 4. Launch Gradio Chat
+# -----------------------------
+gr.ChatInterface(
+    respond,
+    title="Tiny-Purr-1B Chat",
+    description="Protected by PurrBERT-v1 for safety!",
+    examples=[
+        "What's your favorite kind of cat?",
+        "Explain quantum entanglement simply.",
+        "Write me a haiku about the moon."
+    ]
+).launch()