Spaces:

legaltextai
/

test_model

Runtime error

test_model / app.py

Update app.py

609a014 verified 10 months ago

1.19 kB

	import gradio as gr
	import spaces
	import torch
	from transformers import BitsAndBytesConfig, pipeline

	quant_config = BitsAndBytesConfig(
	load_in_4bit=True,
	bnb_4bit_quant_type="nf4",
	bnb_4bit_compute_dtype=torch.float16,
	bnb_4bit_use_double_quant=True
	)

	model = pipeline(
	"text-generation",
	model="unsloth/DeepSeek-R1-Distill-Llama-8B",
	quantization_config=quant_config,
	device_map="auto"
	)


	@spaces.GPU(duration=300) # Increased to 5 minutes
	def chat_response(message, history):
	# Add explicit initialization check
	if not hasattr(chat_response, "pipe"):
	chat_response.pipe = pipeline(...)

	# Add timeout handling
	try:
	response = chat_response.pipe(...)
	return response[0]['generated_text'][-1]["content"]
	except RuntimeError as e:
	return f"GPU timeout: {str(e)}"


	demo = gr.ChatInterface(
	chat_response,
	chatbot=gr.Chatbot(height=500, type="messages"), # Explicit type
	textbox=gr.Textbox(placeholder="Ask me anything...", container=False, scale=7),
	title="DeepSeek-Llama-8B Chat",
	examples=[["What is AI?"]],
	retry_btn=None,
	undo_btn=None
	)

	demo.launch()