Spaces:
Sleeping
Sleeping
| # app.py | |
| import os | |
| import torch | |
| from transformers import AutoTokenizer, AutoModelForCausalLM | |
| import gradio as gr | |
| import spaces | |
| zero = torch.Tensor([0]).cuda() | |
| # βββ CONFIG βββ | |
| MODEL_NAME = "Walid-Ahmed/finetuned_falcon_psychology-question-answer" | |
| device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
| # βββ LOAD TOKENIZER & MODEL βββ | |
| tokenizer = AutoTokenizer.from_pretrained( | |
| MODEL_NAME, | |
| ) | |
| model = AutoModelForCausalLM.from_pretrained( | |
| MODEL_NAME, | |
| ) | |
| model.to(device) | |
| model.eval() | |
| # βββ PROMPT TEMPLATE βββ | |
| chat_prompt = """### Instruction: | |
| {} | |
| ### Input: | |
| {} | |
| ### Response: | |
| {}""" | |
| # βββ INFERENCE FUNCTION βββ | |
| def answer_question(user_input: str, max_new_tokens: int = 64, temperature: float = 0.7): | |
| print(zero.device) # <-- 'cuda:0' π€ | |
| # fill in the template | |
| prompt = chat_prompt.format("", user_input, "") | |
| # tokenize & move to device | |
| inputs = tokenizer(prompt, return_tensors="pt").to(device) | |
| # generate | |
| outputs = model.generate( | |
| input_ids=inputs.input_ids, | |
| max_new_tokens=max_new_tokens, | |
| temperature=temperature, | |
| top_p=0.9, | |
| do_sample=True, | |
| pad_token_id=tokenizer.eos_token_id, | |
| ) | |
| # decode and extract after "### Response:" | |
| decoded = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
| response = decoded.split("### Response:")[-1].strip() | |
| return response | |
| # βββ GRADIO INTERFACE βββ | |
| examples = [ | |
| ["Who is known for their work on classical conditioning?"], | |
| ["What are effective CBT techniques for managing insomnia?"], | |
| ["How can someone cope with panic attacks in public places?"], | |
| ] | |
| iface = gr.Interface( | |
| fn=answer_question, | |
| inputs=[ | |
| gr.Textbox(lines=2, label="Psychology Question"), | |
| gr.Slider(16, 256, value=256, step=1, label="Max New Tokens"), | |
| gr.Slider(0.0, 1.0, value=0.1, step=0.01, label="Temperature"), | |
| ], | |
| outputs=gr.Textbox(label="Model Response"), | |
| examples=examples, | |
| title="Falcon-PsychQA Demo", | |
| description="Enter a psychology-related question and get back your fine-tuned Falcon modelβs answer.", | |
| allow_flagging="never", | |
| ) | |
| if __name__ == "__main__": | |
| iface.launch(share=True,debug=True ) |