Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| # Template definitions | |
| TEMPLATES = { | |
| "ChatML (Qwen, OpenHermes, etc.)": { | |
| "system_start": "<|im_start|>system\n", | |
| "system_end": "<|im_end|>\n", | |
| "user_start": "<|im_start|>user\n", | |
| "user_end": "<|im_end|>\n", | |
| "assistant_start": "<|im_start|>assistant\n", | |
| "assistant_end": "<|im_end|>\n", | |
| "bos": "", | |
| "eos": "", | |
| }, | |
| "Llama 2 / Mistral": { | |
| "system_start": "[INST] <<SYS>>\n", | |
| "system_end": "\n<</SYS>>\n\n", | |
| "user_start": "", | |
| "user_end": " [/INST] ", | |
| "assistant_start": "", | |
| "assistant_end": "</s><s>[INST] ", | |
| "bos": "<s>", | |
| "eos": "</s>", | |
| "first_user_start": "[INST] ", # When no system prompt | |
| }, | |
| "Llama 3 / Llama 3.1": { | |
| "system_start": "<|start_header_id|>system<|end_header_id|>\n\n", | |
| "system_end": "<|eot_id|>", | |
| "user_start": "<|start_header_id|>user<|end_header_id|>\n\n", | |
| "user_end": "<|eot_id|>", | |
| "assistant_start": "<|start_header_id|>assistant<|end_header_id|>\n\n", | |
| "assistant_end": "<|eot_id|>", | |
| "bos": "<|begin_of_text|>", | |
| "eos": "", | |
| }, | |
| "Alpaca": { | |
| "system_start": "", | |
| "system_end": "", | |
| "user_start": "### Instruction:\n", | |
| "user_end": "\n\n", | |
| "input_start": "### Input:\n", | |
| "input_end": "\n\n", | |
| "assistant_start": "### Response:\n", | |
| "assistant_end": "\n\n", | |
| "bos": "", | |
| "eos": "", | |
| }, | |
| "Vicuna": { | |
| "system_start": "", | |
| "system_end": "\n\n", | |
| "user_start": "USER: ", | |
| "user_end": "\n", | |
| "assistant_start": "ASSISTANT: ", | |
| "assistant_end": "</s>\n", | |
| "bos": "", | |
| "eos": "", | |
| }, | |
| "Gemma": { | |
| "system_start": "", | |
| "system_end": "", | |
| "user_start": "<start_of_turn>user\n", | |
| "user_end": "<end_of_turn>\n", | |
| "assistant_start": "<start_of_turn>model\n", | |
| "assistant_end": "<end_of_turn>\n", | |
| "bos": "<bos>", | |
| "eos": "", | |
| }, | |
| "Phi-3": { | |
| "system_start": "<|system|>\n", | |
| "system_end": "<|end|>\n", | |
| "user_start": "<|user|>\n", | |
| "user_end": "<|end|>\n", | |
| "assistant_start": "<|assistant|>\n", | |
| "assistant_end": "<|end|>\n", | |
| "bos": "", | |
| "eos": "", | |
| }, | |
| "Zephyr": { | |
| "system_start": "<|system|>\n", | |
| "system_end": "</s>\n", | |
| "user_start": "<|user|>\n", | |
| "user_end": "</s>\n", | |
| "assistant_start": "<|assistant|>\n", | |
| "assistant_end": "</s>\n", | |
| "bos": "", | |
| "eos": "", | |
| }, | |
| "Command-R": { | |
| "system_start": "<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>", | |
| "system_end": "<|END_OF_TURN_TOKEN|>", | |
| "user_start": "<|START_OF_TURN_TOKEN|><|USER_TOKEN|>", | |
| "user_end": "<|END_OF_TURN_TOKEN|>", | |
| "assistant_start": "<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>", | |
| "assistant_end": "<|END_OF_TURN_TOKEN|>", | |
| "bos": "<BOS_TOKEN>", | |
| "eos": "", | |
| }, | |
| "DeepSeek": { | |
| "system_start": "", | |
| "system_end": "\n\n", | |
| "user_start": "User: ", | |
| "user_end": "\n\n", | |
| "assistant_start": "Assistant: ", | |
| "assistant_end": "<ο½endβofβsentenceο½>", | |
| "bos": "<ο½beginβofβsentenceο½>", | |
| "eos": "", | |
| }, | |
| "Raw (No Template)": { | |
| "system_start": "System: ", | |
| "system_end": "\n\n", | |
| "user_start": "User: ", | |
| "user_end": "\n\n", | |
| "assistant_start": "Assistant: ", | |
| "assistant_end": "\n\n", | |
| "bos": "", | |
| "eos": "", | |
| }, | |
| } | |
| # Model to template mapping | |
| MODEL_TEMPLATES = { | |
| "Qwen/Qwen2.5-7B-Instruct": "ChatML (Qwen, OpenHermes, etc.)", | |
| "Qwen/Qwen2.5-Coder-7B-Instruct": "ChatML (Qwen, OpenHermes, etc.)", | |
| "teknium/OpenHermes-2.5-Mistral-7B": "ChatML (Qwen, OpenHermes, etc.)", | |
| "NousResearch/Hermes-2-Pro-Mistral-7B": "ChatML (Qwen, OpenHermes, etc.)", | |
| "mistralai/Mistral-7B-Instruct-v0.2": "Llama 2 / Mistral", | |
| "mistralai/Mistral-7B-Instruct-v0.3": "Llama 2 / Mistral", | |
| "meta-llama/Llama-2-7b-chat-hf": "Llama 2 / Mistral", | |
| "meta-llama/Meta-Llama-3-8B-Instruct": "Llama 3 / Llama 3.1", | |
| "meta-llama/Meta-Llama-3.1-8B-Instruct": "Llama 3 / Llama 3.1", | |
| "google/gemma-1.1-7b-it": "Gemma", | |
| "google/gemma-2-9b-it": "Gemma", | |
| "microsoft/Phi-3-mini-4k-instruct": "Phi-3", | |
| "HuggingFaceH4/zephyr-7b-beta": "Zephyr", | |
| "CohereForAI/c4ai-command-r-v01": "Command-R", | |
| "deepseek-ai/deepseek-coder-7b-instruct-v1.5": "DeepSeek", | |
| } | |
| def format_prompt(template_name, system_msg, user_msg, assistant_msg, include_generation_prompt): | |
| """Format messages using the selected template""" | |
| if template_name not in TEMPLATES: | |
| return "Template not found" | |
| t = TEMPLATES[template_name] | |
| prompt = "" | |
| # Add BOS token | |
| if t.get("bos"): | |
| prompt += t["bos"] | |
| # Add system message | |
| if system_msg.strip(): | |
| prompt += t["system_start"] + system_msg.strip() + t["system_end"] | |
| # Add user message | |
| if user_msg.strip(): | |
| # Special case for Llama 2 without system prompt | |
| if template_name == "Llama 2 / Mistral" and not system_msg.strip(): | |
| prompt += t.get("first_user_start", t["user_start"]) | |
| else: | |
| prompt += t["user_start"] | |
| prompt += user_msg.strip() + t["user_end"] | |
| # Add assistant message (if provided, for multi-turn) | |
| if assistant_msg.strip(): | |
| prompt += t["assistant_start"] + assistant_msg.strip() + t["assistant_end"] | |
| # Add generation prompt (assistant start token) | |
| if include_generation_prompt: | |
| prompt += t["assistant_start"] | |
| return prompt | |
| def format_multi_turn(template_name, conversation_text, include_generation_prompt): | |
| """Format multi-turn conversation""" | |
| if template_name not in TEMPLATES: | |
| return "Template not found" | |
| t = TEMPLATES[template_name] | |
| prompt = "" | |
| # Add BOS token | |
| if t.get("bos"): | |
| prompt += t["bos"] | |
| # Parse conversation | |
| lines = conversation_text.strip().split("\n") | |
| system_msg = "" | |
| messages = [] | |
| for line in lines: | |
| line = line.strip() | |
| if not line: | |
| continue | |
| if line.lower().startswith("system:"): | |
| system_msg = line[7:].strip() | |
| elif line.lower().startswith("user:"): | |
| messages.append(("user", line[5:].strip())) | |
| elif line.lower().startswith("assistant:"): | |
| messages.append(("assistant", line[10:].strip())) | |
| # Add system message | |
| if system_msg: | |
| prompt += t["system_start"] + system_msg + t["system_end"] | |
| # Add conversation turns | |
| for i, (role, content) in enumerate(messages): | |
| if role == "user": | |
| # Special case for Llama 2 first user without system | |
| if template_name == "Llama 2 / Mistral" and i == 0 and not system_msg: | |
| prompt += t.get("first_user_start", t["user_start"]) | |
| else: | |
| prompt += t["user_start"] | |
| prompt += content + t["user_end"] | |
| elif role == "assistant": | |
| prompt += t["assistant_start"] + content + t["assistant_end"] | |
| # Add generation prompt | |
| if include_generation_prompt: | |
| prompt += t["assistant_start"] | |
| return prompt | |
| def get_template_from_model(model_name): | |
| """Get template name from model""" | |
| if model_name in MODEL_TEMPLATES: | |
| return MODEL_TEMPLATES[model_name] | |
| # Try to guess from model name | |
| model_lower = model_name.lower() | |
| if "qwen" in model_lower or "hermes" in model_lower: | |
| return "ChatML (Qwen, OpenHermes, etc.)" | |
| elif "llama-3" in model_lower or "llama3" in model_lower: | |
| return "Llama 3 / Llama 3.1" | |
| elif "llama-2" in model_lower or "mistral" in model_lower: | |
| return "Llama 2 / Mistral" | |
| elif "gemma" in model_lower: | |
| return "Gemma" | |
| elif "phi" in model_lower: | |
| return "Phi-3" | |
| elif "zephyr" in model_lower: | |
| return "Zephyr" | |
| elif "deepseek" in model_lower: | |
| return "DeepSeek" | |
| return "ChatML (Qwen, OpenHermes, etc.)" | |
| def generate_code_snippet(template_name, system_msg, user_msg): | |
| """Generate Python code for using this template""" | |
| code = f'''from transformers import AutoTokenizer, AutoModelForCausalLM | |
| model_id = "YOUR_MODEL_HERE" | |
| tokenizer = AutoTokenizer.from_pretrained(model_id) | |
| model = AutoModelForCausalLM.from_pretrained(model_id) | |
| messages = [ | |
| {{"role": "system", "content": """{system_msg}"""}}, | |
| {{"role": "user", "content": """{user_msg}"""}} | |
| ] | |
| # Apply chat template | |
| prompt = tokenizer.apply_chat_template( | |
| messages, | |
| tokenize=False, | |
| add_generation_prompt=True | |
| ) | |
| # Generate | |
| inputs = tokenizer(prompt, return_tensors="pt") | |
| outputs = model.generate(**inputs, max_new_tokens=256) | |
| print(tokenizer.decode(outputs[0], skip_special_tokens=True)) | |
| ''' | |
| return code | |
| def generate_llama_cpp_snippet(formatted_prompt): | |
| """Generate llama.cpp compatible code""" | |
| # Escape the prompt for Python string | |
| escaped = formatted_prompt.replace('\\', '\\\\').replace('"', '\\"').replace('\n', '\\n') | |
| code = f'''# For llama-cpp-python | |
| from llama_cpp import Llama | |
| llm = Llama(model_path="your_model.gguf", n_ctx=4096) | |
| prompt = """{formatted_prompt}""" | |
| output = llm( | |
| prompt, | |
| max_tokens=256, | |
| stop=["<|im_end|>", "</s>", "<|eot_id|>"], # Adjust based on template | |
| echo=False | |
| ) | |
| print(output["choices"][0]["text"]) | |
| ''' | |
| return code | |
| # ============== GRADIO UI ============== | |
| with gr.Blocks(title="π Prompt Template Tester", theme=gr.themes.Soft()) as demo: | |
| gr.Markdown(""" | |
| # π Prompt Template Tester | |
| See exactly how your prompt gets formatted for different models. | |
| Stop guessing why your model outputs garbage! | |
| **By [AIencoder](https://huggingface.co/AIencoder)** π | |
| """) | |
| with gr.Tabs(): | |
| with gr.TabItem("π¬ Single Turn"): | |
| with gr.Row(): | |
| with gr.Column(): | |
| template_dropdown = gr.Dropdown( | |
| label="Template Format", | |
| choices=list(TEMPLATES.keys()), | |
| value="ChatML (Qwen, OpenHermes, etc.)" | |
| ) | |
| model_lookup = gr.Textbox( | |
| label="Or enter model name to auto-detect", | |
| placeholder="Qwen/Qwen2.5-7B-Instruct" | |
| ) | |
| detect_btn = gr.Button("π Detect Template", variant="secondary", size="sm") | |
| system_input = gr.Textbox( | |
| label="System Message", | |
| placeholder="You are a helpful coding assistant.", | |
| lines=2 | |
| ) | |
| user_input = gr.Textbox( | |
| label="User Message", | |
| placeholder="Write a Python function to reverse a string", | |
| lines=3 | |
| ) | |
| assistant_input = gr.Textbox( | |
| label="Assistant Message (optional, for multi-turn)", | |
| placeholder="Here's a function to reverse a string...", | |
| lines=2 | |
| ) | |
| gen_prompt_checkbox = gr.Checkbox( | |
| label="Include generation prompt (assistant start token)", | |
| value=True | |
| ) | |
| format_btn = gr.Button("π Format Prompt", variant="primary") | |
| with gr.Column(): | |
| formatted_output = gr.Code( | |
| label="Formatted Prompt", | |
| language=None, | |
| lines=15 | |
| ) | |
| char_count = gr.Markdown("") | |
| with gr.Accordion("π Python Code Snippet", open=False): | |
| code_output = gr.Code(label="Transformers Code", language="python", lines=15) | |
| with gr.Accordion("π¦ llama.cpp Code Snippet", open=False): | |
| llama_output = gr.Code(label="llama-cpp-python Code", language="python", lines=12) | |
| with gr.TabItem("π Multi-Turn"): | |
| with gr.Row(): | |
| with gr.Column(): | |
| multi_template = gr.Dropdown( | |
| label="Template Format", | |
| choices=list(TEMPLATES.keys()), | |
| value="ChatML (Qwen, OpenHermes, etc.)" | |
| ) | |
| conversation_input = gr.Textbox( | |
| label="Conversation (one message per line)", | |
| placeholder="""System: You are a helpful assistant | |
| User: Hello! | |
| Assistant: Hi there! How can I help? | |
| User: What's 2+2?""", | |
| lines=10 | |
| ) | |
| multi_gen_prompt = gr.Checkbox( | |
| label="Include generation prompt", | |
| value=True | |
| ) | |
| multi_format_btn = gr.Button("π Format Conversation", variant="primary") | |
| with gr.Column(): | |
| multi_output = gr.Code( | |
| label="Formatted Conversation", | |
| language=None, | |
| lines=20 | |
| ) | |
| with gr.TabItem("π Template Reference"): | |
| gr.Markdown(""" | |
| ## Template Formats | |
| | Template | Used By | Special Tokens | | |
| |----------|---------|----------------| | |
| | **ChatML** | Qwen, OpenHermes, Nous Hermes | `<\|im_start\|>`, `<\|im_end\|>` | | |
| | **Llama 2 / Mistral** | Llama 2, Mistral v0.1-0.3 | `[INST]`, `[/INST]`, `<<SYS>>` | | |
| | **Llama 3** | Llama 3, Llama 3.1 | `<\|start_header_id\|>`, `<\|eot_id\|>` | | |
| | **Alpaca** | Alpaca-style models | `### Instruction:`, `### Response:` | | |
| | **Vicuna** | Vicuna models | `USER:`, `ASSISTANT:` | | |
| | **Gemma** | Google Gemma | `<start_of_turn>`, `<end_of_turn>` | | |
| | **Phi-3** | Microsoft Phi-3 | `<\|system\|>`, `<\|user\|>`, `<\|assistant\|>` | | |
| | **Zephyr** | Zephyr models | `<\|system\|>`, `</s>` | | |
| ## Common Mistakes | |
| β **Using wrong template** β Model outputs garbage or repeats prompt | |
| β **Missing generation prompt** β Model doesn't know to start generating | |
| β **Wrong stop tokens** β Model generates forever or stops too early | |
| β **System prompt in wrong place** β Model ignores instructions | |
| ## Tips | |
| β Always check the model card for the correct template | |
| β Use `tokenizer.apply_chat_template()` when possible | |
| β Test your prompts here before running expensive inference! | |
| """) | |
| # Event handlers | |
| def update_char_count(text): | |
| return f"**Length:** {len(text)} characters, ~{len(text)//4} tokens" | |
| def on_format(template, system, user, assistant, gen_prompt): | |
| formatted = format_prompt(template, system, user, assistant, gen_prompt) | |
| code = generate_code_snippet(template, system, user) | |
| llama = generate_llama_cpp_snippet(formatted) | |
| count = update_char_count(formatted) | |
| return formatted, count, code, llama | |
| def on_detect(model_name): | |
| return get_template_from_model(model_name) | |
| detect_btn.click( | |
| fn=on_detect, | |
| inputs=[model_lookup], | |
| outputs=[template_dropdown] | |
| ) | |
| format_btn.click( | |
| fn=on_format, | |
| inputs=[template_dropdown, system_input, user_input, assistant_input, gen_prompt_checkbox], | |
| outputs=[formatted_output, char_count, code_output, llama_output] | |
| ) | |
| multi_format_btn.click( | |
| fn=format_multi_turn, | |
| inputs=[multi_template, conversation_input, multi_gen_prompt], | |
| outputs=[multi_output] | |
| ) | |
| demo.launch(server_name="0.0.0.0", server_port=7860) |