Spaces:
Running
on
Zero
Running
on
Zero
| import gradio as gr | |
| from transformers import Qwen3VLForConditionalGeneration, AutoProcessor | |
| import torch | |
| from PIL import Image | |
| import io | |
| import base64 | |
| import spaces | |
| # Load model and processor | |
| model = Qwen3VLForConditionalGeneration.from_pretrained( | |
| "Qwen/Qwen3-VL-2B-Instruct", | |
| torch_dtype=torch.bfloat16, | |
| device_map="auto" | |
| ) | |
| processor = AutoProcessor.from_pretrained("Qwen/Qwen3-VL-2B-Instruct") | |
| def process_image(image): | |
| """Convert image to base64 string for processing""" | |
| if isinstance(image, str): | |
| return image | |
| if isinstance(image, Image.Image): | |
| buffered = io.BytesIO() | |
| image.save(buffered, format="PNG") | |
| img_str = base64.b64encode(buffered.getvalue()).decode() | |
| return f"data:image/png;base64,{img_str}" | |
| return image | |
| def qwen_chat(message, image, chat_history): | |
| """ | |
| Process chat message with optional image input | |
| Args: | |
| message (str): User's text message | |
| image: Optional image input | |
| chat_history (list): Previous conversation history | |
| Returns: | |
| tuple: Updated chat history and empty message input | |
| """ | |
| if not message and image is None: | |
| return chat_history, "" | |
| # Build messages list | |
| messages = [] | |
| # Add previous chat history | |
| for user_msg, assistant_msg in chat_history: | |
| messages.append({"role": "user", "content": [{"type": "text", "text": user_msg}]}) | |
| messages.append({"role": "assistant", "content": [{"type": "text", "text": assistant_msg}]}) | |
| # Add current message with optional image | |
| current_content = [] | |
| if image is not None: | |
| current_content.append({ | |
| "type": "image", | |
| "image": image | |
| }) | |
| if message: | |
| current_content.append({ | |
| "type": "text", | |
| "text": message | |
| }) | |
| messages.append({ | |
| "role": "user", | |
| "content": current_content | |
| }) | |
| # Prepare inputs | |
| inputs = processor.apply_chat_template( | |
| messages, | |
| tokenize=True, | |
| add_generation_prompt=True, | |
| return_dict=True, | |
| return_tensors="pt" | |
| ) | |
| inputs = inputs.to(model.device) | |
| # Generate response | |
| with torch.no_grad(): | |
| generated_ids = model.generate(**inputs, max_new_tokens=256) | |
| # Decode output | |
| generated_ids_trimmed = [ | |
| out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids) | |
| ] | |
| output_text = processor.batch_decode( | |
| generated_ids_trimmed, | |
| skip_special_tokens=True, | |
| clean_up_tokenization_spaces=False | |
| )[0] | |
| # Update chat history | |
| chat_history.append((message if message else "[Image provided]", output_text)) | |
| return chat_history, "" | |
| # Create Gradio interface | |
| with gr.Blocks(title="Qwen3-VL Chat") as demo: | |
| gr.Markdown( | |
| """ | |
| # 🎨 Qwen3-VL Chat | |
| Chat with Qwen3-VL-2B-Instruct - A multimodal AI that can understand both text and images! | |
| [Built with anycoder](https://huggingface.co/spaces/akhaliq/anycoder) | |
| """ | |
| ) | |
| with gr.Row(): | |
| with gr.Column(scale=3): | |
| chatbot = gr.Chatbot( | |
| label="Chat History", | |
| type="messages", | |
| height=600, | |
| show_copy_button=True | |
| ) | |
| with gr.Column(scale=1): | |
| image_input = gr.Image( | |
| label="Upload Image (Optional)", | |
| type="pil", | |
| sources=["upload", "clipboard"], | |
| interactive=True | |
| ) | |
| with gr.Row(): | |
| message_input = gr.Textbox( | |
| label="Message", | |
| placeholder="Type your message here...", | |
| lines=2, | |
| scale=4 | |
| ) | |
| send_btn = gr.Button("Send", scale=1, variant="primary") | |
| with gr.Row(): | |
| clear_btn = gr.Button("Clear Chat", variant="secondary") | |
| gr.Markdown( | |
| """ | |
| ### Tips: | |
| - Upload an image to ask questions about it | |
| - Describe what you see or ask for analysis | |
| - The model can answer questions about images and text | |
| """ | |
| ) | |
| # Event handlers | |
| def send_message(msg, img, history): | |
| return qwen_chat(msg, img, history) | |
| send_btn.click( | |
| send_message, | |
| inputs=[message_input, image_input, chatbot], | |
| outputs=[chatbot, message_input] | |
| ) | |
| message_input.submit( | |
| send_message, | |
| inputs=[message_input, image_input, chatbot], | |
| outputs=[chatbot, message_input] | |
| ) | |
| clear_btn.click( | |
| lambda: ([], None, ""), | |
| outputs=[chatbot, image_input, message_input] | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch(share=False) |