Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| import random | |
| import time | |
| import torch | |
| import bitsandbytes | |
| import accelerate | |
| import peft | |
| # Use a pipeline as a high-level helper | |
| # Use a pipeline as a high-level helper | |
| from transformers import pipeline | |
| from transformers import BitsAndBytesConfig | |
| from transformers import AutoTokenizer,AutoModelForCausalLM | |
| nf4_config = BitsAndBytesConfig( | |
| load_in_4bit=True, | |
| bnb_4bit_quant_type="nf4", | |
| bnb_4bit_use_double_quant=True, | |
| bnb_4bit_compute_dtype=torch.bfloat16 | |
| ) | |
| tokenizer = AutoTokenizer.from_pretrained("llSourcell/medllama2_7b",quantization_config=nf4_config) | |
| model = AutoModelForCausalLM.from_pretrained("llSourcell/medllama2_7b",quantization_config=nf4_config) | |
| with gr.Blocks() as demo: | |
| chatbot = gr.Chatbot() | |
| msg = gr.Textbox() | |
| clear = gr.ClearButton([msg, chatbot]) | |
| def respond(message, chat_history): | |
| inputs = tokenizer(message, return_tensors="pt") | |
| generate_ids = model.generate(inputs.input_ids, max_length=30) | |
| bot_message = tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0] | |
| chat_history.append((message, bot_message)) | |
| time.sleep(2) | |
| return "", chat_history | |
| msg.submit(respond, [msg, chatbot], [msg, chatbot]) | |
| demo.launch() |