Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| import spaces | |
| import torch | |
| from transformers import BitsAndBytesConfig, pipeline | |
| quant_config = BitsAndBytesConfig( | |
| load_in_4bit=True, | |
| bnb_4bit_quant_type="nf4", | |
| bnb_4bit_compute_dtype=torch.float16, | |
| bnb_4bit_use_double_quant=True | |
| ) | |
| model = pipeline( | |
| "text-generation", | |
| model="unsloth/DeepSeek-R1-Distill-Llama-8B", | |
| quantization_config=quant_config, | |
| device_map="auto" | |
| ) | |
| # Increased to 5 minutes | |
| def chat_response(message, history): | |
| # Add explicit initialization check | |
| if not hasattr(chat_response, "pipe"): | |
| chat_response.pipe = pipeline(...) | |
| # Add timeout handling | |
| try: | |
| response = chat_response.pipe(...) | |
| return response[0]['generated_text'][-1]["content"] | |
| except RuntimeError as e: | |
| return f"GPU timeout: {str(e)}" | |
| demo = gr.ChatInterface( | |
| chat_response, | |
| chatbot=gr.Chatbot(height=500, type="messages"), # Explicit type | |
| textbox=gr.Textbox(placeholder="Ask me anything...", container=False, scale=7), | |
| title="DeepSeek-Llama-8B Chat", | |
| examples=[["What is AI?"]], | |
| retry_btn=None, | |
| undo_btn=None | |
| ) | |
| demo.launch() | |