Spaces:

akhaliq
/

Qwen3-VL-2B-Instruct

Running on Zero

App Files Files Community

Qwen3-VL-2B-Instruct / app.py

akhaliq HF Staff

Deploy Gradio app with multiple files

a14c972 verified about 2 months ago

raw

history blame

4.8 kB

	import gradio as gr
	from transformers import Qwen3VLForConditionalGeneration, AutoProcessor
	import torch
	from PIL import Image
	import io
	import base64

	import spaces

	# Load model and processor
	model = Qwen3VLForConditionalGeneration.from_pretrained(
	"Qwen/Qwen3-VL-2B-Instruct",
	torch_dtype=torch.bfloat16,
	device_map="auto"
	)
	processor = AutoProcessor.from_pretrained("Qwen/Qwen3-VL-2B-Instruct")

	def process_image(image):
	"""Convert image to base64 string for processing"""
	if isinstance(image, str):
	return image
	if isinstance(image, Image.Image):
	buffered = io.BytesIO()
	image.save(buffered, format="PNG")
	img_str = base64.b64encode(buffered.getvalue()).decode()
	return f"data:image/png;base64,{img_str}"
	return image

	@spaces.GPU(duration=120)
	def qwen_chat(message, image, chat_history):
	"""
	Process chat message with optional image input

	Args:
	message (str): User's text message
	image: Optional image input
	chat_history (list): Previous conversation history

	Returns:
	tuple: Updated chat history and empty message input
	"""
	if not message and image is None:
	return chat_history, ""

	# Build messages list
	messages = []

	# Add previous chat history
	for user_msg, assistant_msg in chat_history:
	messages.append({"role": "user", "content": [{"type": "text", "text": user_msg}]})
	messages.append({"role": "assistant", "content": [{"type": "text", "text": assistant_msg}]})

	# Add current message with optional image
	current_content = []
	if image is not None:
	current_content.append({
	"type": "image",
	"image": image
	})

	if message:
	current_content.append({
	"type": "text",
	"text": message
	})

	messages.append({
	"role": "user",
	"content": current_content
	})

	# Prepare inputs
	inputs = processor.apply_chat_template(
	messages,
	tokenize=True,
	add_generation_prompt=True,
	return_dict=True,
	return_tensors="pt"
	)
	inputs = inputs.to(model.device)

	# Generate response
	with torch.no_grad():
	generated_ids = model.generate(**inputs, max_new_tokens=256)

	# Decode output
	generated_ids_trimmed = [
	out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
	]
	output_text = processor.batch_decode(
	generated_ids_trimmed,
	skip_special_tokens=True,
	clean_up_tokenization_spaces=False
	)[0]

	# Update chat history
	chat_history.append((message if message else "[Image provided]", output_text))

	return chat_history, ""

	# Create Gradio interface
	with gr.Blocks(title="Qwen3-VL Chat") as demo:
	gr.Markdown(
	"""
	# 🎨 Qwen3-VL Chat
	Chat with Qwen3-VL-2B-Instruct - A multimodal AI that can understand both text and images!

	[Built with anycoder](https://huggingface.co/spaces/akhaliq/anycoder)
	"""
	)

	with gr.Row():
	with gr.Column(scale=3):
	chatbot = gr.Chatbot(
	label="Chat History",
	type="messages",
	height=600,
	show_copy_button=True
	)

	with gr.Column(scale=1):
	image_input = gr.Image(
	label="Upload Image (Optional)",
	type="pil",
	sources=["upload", "clipboard"],
	interactive=True
	)

	with gr.Row():
	message_input = gr.Textbox(
	label="Message",
	placeholder="Type your message here...",
	lines=2,
	scale=4
	)
	send_btn = gr.Button("Send", scale=1, variant="primary")

	with gr.Row():
	clear_btn = gr.Button("Clear Chat", variant="secondary")

	gr.Markdown(
	"""
	### Tips:
	- Upload an image to ask questions about it
	- Describe what you see or ask for analysis
	- The model can answer questions about images and text
	"""
	)

	# Event handlers
	def send_message(msg, img, history):
	return qwen_chat(msg, img, history)

	send_btn.click(
	send_message,
	inputs=[message_input, image_input, chatbot],
	outputs=[chatbot, message_input]
	)

	message_input.submit(
	send_message,
	inputs=[message_input, image_input, chatbot],
	outputs=[chatbot, message_input]
	)

	clear_btn.click(
	lambda: ([], None, ""),
	outputs=[chatbot, image_input, message_input]
	)

	if __name__ == "__main__":
	demo.launch(share=False)