AiCoderv2 commited on
Commit
3720b00
·
verified ·
1 Parent(s): be9534f

Update Gradio app with multiple files

Browse files
Files changed (5) hide show
  1. app.py +64 -21
  2. config.py +8 -1
  3. models.py +43 -17
  4. requirements.txt +4 -2
  5. utils.py +21 -2
app.py CHANGED
@@ -1,42 +1,85 @@
1
  import gradio as gr
2
- from models import chat_with_model
3
- from config import MODEL_NAME
 
 
4
 
5
- def chat_response(message, history):
6
- # Format history for the model
7
- conversation = []
8
- for user_msg, assistant_msg in history:
9
- conversation.append({"role": "user", "content": user_msg})
10
- if assistant_msg:
11
- conversation.append({"role": "assistant", "content": assistant_msg})
12
- conversation.append({"role": "user", "content": message})
13
-
14
- response = chat_with_model(conversation)
 
 
 
 
 
 
15
  return response
16
 
17
- with gr.Blocks(title="LLM Chatbot") as demo:
18
- gr.Markdown("# 🤖 LLM Chatbot")
19
- gr.Markdown("*Powered by a Hugging Face model*")
20
  gr.Markdown("[Built with anycoder](https://huggingface.co/spaces/akhaliq/anycoder)")
21
 
22
- chatbot = gr.Chatbot(type="messages", height=400)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
  msg = gr.Textbox(placeholder="Type your message here...", label="Message")
24
 
25
  with gr.Row():
26
- submit_btn = gr.Button("Send")
27
  clear_btn = gr.ClearButton([msg, chatbot])
 
28
 
29
- def respond(message, chat_history):
30
  if not message.strip():
31
  return "", chat_history
32
 
33
- response = chat_response(message, chat_history)
34
  chat_history.append({"role": "user", "content": message})
35
  chat_history.append({"role": "assistant", "content": response})
36
  return "", chat_history
37
 
38
- msg.submit(respond, [msg, chatbot], [msg, chatbot])
39
- submit_btn.click(respond, [msg, chatbot], [msg, chatbot])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
 
41
  if __name__ == "__main__":
42
  demo.launch()
 
1
  import gradio as gr
2
+ import spaces
3
+ from models import load_model, chat_with_model
4
+ from config import AVAILABLE_MODELS, DEFAULT_MODEL
5
+ from utils import format_conversation, save_chat_history, load_chat_history
6
 
7
+ # Global variable for current model
8
+ current_model = None
9
+
10
+ def switch_model(model_name):
11
+ global current_model
12
+ current_model = load_model(model_name)
13
+ return f"Switched to {model_name}"
14
+
15
+ @spaces.GPU(duration=120) # GPU acceleration for inference
16
+ def generate_response(message, history, model_name, streaming=False):
17
+ global current_model
18
+ if current_model is None or current_model['name'] != model_name:
19
+ current_model = load_model(model_name)
20
+
21
+ conversation = format_conversation(message, history)
22
+ response = chat_with_model(current_model, conversation, streaming=streaming)
23
  return response
24
 
25
+ with gr.Blocks(title="Advanced LLM Chatbot", theme=gr.themes.Soft()) as demo:
26
+ gr.Markdown("# 🤖 Advanced LLM Chatbot")
27
+ gr.Markdown("*Powered by Hugging Face models with model switching and streaming*")
28
  gr.Markdown("[Built with anycoder](https://huggingface.co/spaces/akhaliq/anycoder)")
29
 
30
+ with gr.Row():
31
+ model_dropdown = gr.Dropdown(
32
+ choices=list(AVAILABLE_MODELS.keys()),
33
+ value=DEFAULT_MODEL,
34
+ label="Select Model",
35
+ info="Choose a chat model to use"
36
+ )
37
+ switch_btn = gr.Button("Switch Model")
38
+ status = gr.Textbox(label="Status", interactive=False, value=f"Current: {DEFAULT_MODEL}")
39
+
40
+ with gr.Row():
41
+ streaming_toggle = gr.Checkbox(label="Enable Streaming", value=False)
42
+ save_btn = gr.Button("Save Chat")
43
+ load_btn = gr.Button("Load Chat")
44
+
45
+ chatbot = gr.Chatbot(type="messages", height=400, show_copy_button=True)
46
  msg = gr.Textbox(placeholder="Type your message here...", label="Message")
47
 
48
  with gr.Row():
49
+ submit_btn = gr.Button("Send", variant="primary")
50
  clear_btn = gr.ClearButton([msg, chatbot])
51
+ undo_btn = gr.Button("Undo Last")
52
 
53
+ def respond(message, chat_history, model_name, streaming):
54
  if not message.strip():
55
  return "", chat_history
56
 
57
+ response = generate_response(message, chat_history, model_name, streaming)
58
  chat_history.append({"role": "user", "content": message})
59
  chat_history.append({"role": "assistant", "content": response})
60
  return "", chat_history
61
 
62
+ def undo_last(history):
63
+ if len(history) >= 2:
64
+ return history[:-2] # Remove last user and assistant message
65
+ return history
66
+
67
+ def save_history(history):
68
+ save_chat_history(history)
69
+ return "Chat saved!"
70
+
71
+ def load_history():
72
+ return load_chat_history()
73
+
74
+ model_dropdown.change(switch_model, model_dropdown, status)
75
+ switch_btn.click(switch_model, model_dropdown, status)
76
+
77
+ msg.submit(respond, [msg, chatbot, model_dropdown, streaming_toggle], [msg, chatbot])
78
+ submit_btn.click(respond, [msg, chatbot, model_dropdown, streaming_toggle], [msg, chatbot])
79
+
80
+ undo_btn.click(undo_last, chatbot, chatbot)
81
+ save_btn.click(save_history, chatbot, status)
82
+ load_btn.click(load_history, None, chatbot)
83
 
84
  if __name__ == "__main__":
85
  demo.launch()
config.py CHANGED
@@ -1,2 +1,9 @@
1
  # Configuration for the chatbot
2
- MODEL_NAME = "microsoft/DialoGPT-medium" # Change to your preferred Hugging Face chat model
 
 
 
 
 
 
 
 
1
  # Configuration for the chatbot
2
+ AVAILABLE_MODELS = {
3
+ "microsoft/DialoGPT-medium": "DialoGPT Medium (Conversational)",
4
+ "microsoft/DialoGPT-large": "DialoGPT Large (Advanced)",
5
+ "facebook/blenderbot-400M-distill": "BlenderBot 400M (Conversational)",
6
+ "microsoft/GODEL-v1_1-base-seq2seq": "GODEL Base (Goal-oriented)"
7
+ }
8
+
9
+ DEFAULT_MODEL = "microsoft/DialoGPT-medium"
models.py CHANGED
@@ -1,22 +1,48 @@
1
- from transformers import pipeline
2
- from utils import format_conversation
3
- from config import MODEL_NAME
4
 
5
- # Load the model pipeline once
6
- chat_pipeline = None
7
 
8
- def get_chat_pipeline():
9
- global chat_pipeline
10
- if chat_pipeline is None:
11
- chat_pipeline = pipeline("conversational", model=MODEL_NAME)
12
- return chat_pipeline
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
 
14
- def chat_with_model(conversation):
15
- """Send conversation to the Hugging Face model and get response."""
16
  try:
17
- pipe = get_chat_pipeline()
18
- formatted_input = format_conversation(conversation)
19
- response = pipe(formatted_input)
20
- return response[0]['generated_text']
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  except Exception as e:
22
- return f"Error: {str(e)}"
 
1
+ from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
2
+ import torch
 
3
 
4
+ # Cache for loaded models
5
+ model_cache = {}
6
 
7
+ def load_model(model_name):
8
+ """Load and cache a Hugging Face model."""
9
+ if model_name not in model_cache:
10
+ try:
11
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
12
+ model = AutoModelForCausalLM.from_pretrained(
13
+ model_name,
14
+ torch_dtype=torch.float16, # Use float16 for efficiency
15
+ device_map="auto" # Auto-detect GPU
16
+ )
17
+ model_cache[model_name] = {
18
+ 'name': model_name,
19
+ 'tokenizer': tokenizer,
20
+ 'model': model
21
+ }
22
+ except Exception as e:
23
+ raise ValueError(f"Failed to load model {model_name}: {str(e)}")
24
+
25
+ return model_cache[model_name]
26
 
27
+ def chat_with_model(model_data, conversation, streaming=False):
28
+ """Generate response using the loaded model."""
29
  try:
30
+ tokenizer = model_data['tokenizer']
31
+ model = model_data['model']
32
+
33
+ inputs = tokenizer(conversation, return_tensors="pt").to(model.device)
34
+
35
+ with torch.no_grad():
36
+ outputs = model.generate(
37
+ **inputs,
38
+ max_length=inputs['input_ids'].shape[1] + 100, # Generate up to 100 new tokens
39
+ do_sample=True,
40
+ temperature=0.7,
41
+ top_p=0.9,
42
+ pad_token_id=tokenizer.eos_token_id
43
+ )
44
+
45
+ response = tokenizer.decode(outputs[0][inputs['input_ids'].shape[1]:], skip_special_tokens=True)
46
+ return response.strip()
47
  except Exception as e:
48
+ return f"Error generating response: {str(e)}"
requirements.txt CHANGED
@@ -1,4 +1,6 @@
1
  gradio>=4.0.0
2
- transformers>=4.0.0
3
  torch>=2.0.0
4
- accelerate>=0.20.0
 
 
 
1
  gradio>=4.0.0
2
+ transformers>=4.21.0
3
  torch>=2.0.0
4
+ accelerate>=0.20.0
5
+ spaces>=0.25.0 # For GPU acceleration on Spaces
6
+ sentencepiece>=0.1.99 # Required for some tokenizers
utils.py CHANGED
@@ -1,3 +1,22 @@
1
- def format_conversation(conversation):
 
 
 
2
  """Format conversation for the model."""
3
- return "\n".join([f"{msg['role']}: {msg['content']}" for msg in conversation])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import os
3
+
4
+ def format_conversation(message, history):
5
  """Format conversation for the model."""
6
+ conversation = []
7
+ for msg in history:
8
+ conversation.append(f"{msg['role']}: {msg['content']}")
9
+ conversation.append(f"user: {message}")
10
+ return "\n".join(conversation)
11
+
12
+ def save_chat_history(history):
13
+ """Save chat history to a file."""
14
+ with open("chat_history.json", "w") as f:
15
+ json.dump(history, f)
16
+
17
+ def load_chat_history():
18
+ """Load chat history from a file."""
19
+ if os.path.exists("chat_history.json"):
20
+ with open("chat_history.json", "r") as f:
21
+ return json.load(f)
22
+ return []