ykvns commited on
Commit
584fe01
Β·
verified Β·
1 Parent(s): 6e1df7d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -37
app.py CHANGED
@@ -3,71 +3,59 @@ import torch
3
  from threading import Thread
4
  from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
5
 
6
- # Model and Tokenizer Configuration
7
  MODEL_REPO_ID = "LGAI-EXAONE/EXAONE-4.0-1.2B"
8
 
9
  print("βœ… Starting application...")
10
 
11
- # Load the model with bfloat16 to save memory
12
  try:
13
  print(f"πŸ”„ Loading tokenizer from '{MODEL_REPO_ID}'...")
14
- tokenizer = AutoTokenizer.from_pretrained(MODEL_REPO_ID)
 
 
 
15
  print("βœ… Tokenizer loaded successfully.")
16
 
17
  print(f"πŸ”„ Loading model '{MODEL_REPO_ID}' with torch_dtype=torch.bfloat16...")
18
-
19
  model = AutoModelForCausalLM.from_pretrained(
20
  MODEL_REPO_ID,
21
  torch_dtype=torch.bfloat16,
22
- device_map="auto"
 
23
  )
24
  print("βœ… Model loaded successfully.")
25
 
26
  except Exception as e:
27
  print(f"❌ Error loading model or tokenizer: {e}")
28
- # Exit if model fails to load, as the app is unusable.
29
  raise
30
 
31
- # Streaming Chat Function
32
  def user_input_handler(user_message, history):
33
- """Handles user input by appending it to the history."""
34
  return "", history + [[user_message, None]]
35
 
36
  def bot_stream(history):
37
- """
38
- Generates the bot's response using a streaming approach.
39
- This function runs the model in a separate thread to avoid blocking the UI.
40
- """
41
- print(f"πŸ“ History received: {history}")
42
- # The last message is the user's prompt.
43
  user_message = history[-1][0]
44
- history[-1][1] = "" # Initialize the bot's response field.
45
 
46
- # Format the conversation history into the model's expected chat format.
47
  messages = []
48
- for human, assistant in history[:-1]: # All but the last interaction
49
  messages.append({"role": "user", "content": human})
50
- if assistant: # Assistant message might be None
51
  messages.append({"role": "assistant", "content": assistant})
52
  messages.append({"role": "user", "content": user_message})
53
 
54
- try:
55
- # Apply the chat template to format the prompt.
56
- prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
57
- except Exception as e:
58
- print(f"⚠️ Warning: Could not apply chat template. Using basic formatting. Error: {e}")
59
- # Fallback for models without a registered chat template
60
- prompt = "\n".join([f"{msg['role']}: {msg['content']}" for msg in messages]) + "\nassistant:"
61
-
62
- print("➑️ Generated Prompt for Model:\n" + prompt)
63
-
64
- # Tokenize the formatted prompt.
65
  inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
66
 
67
- # Use TextIteratorStreamer for non-blocking, token-by-token generation.
68
  streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
69
 
70
- # Set up the generation parameters in a dictionary.
71
  generation_kwargs = dict(
72
  inputs,
73
  streamer=streamer,
@@ -78,19 +66,19 @@ def bot_stream(history):
78
  eos_token_id=tokenizer.eos_token_id,
79
  )
80
 
81
- # Run the generation in a separate thread to avoid blocking the Gradio UI.
82
  thread = Thread(target=model.generate, kwargs=generation_kwargs)
83
  thread.start()
84
 
85
- # Yield each new token to the Gradio chat interface as it's generated.
86
  for token in streamer:
87
  history[-1][1] += token
88
  yield history
89
 
90
- # Gradio User Interface
91
  with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue", secondary_hue="sky"), css="footer {display: none !important}") as demo:
92
  gr.Markdown("## πŸ€– EXAONE-4.0-1.2B")
93
- gr.Markdown("This demo runs the standard `LGAI-EXAONE/EXAONE-4.0-1.2B` model using the `transformers` library.")
94
 
95
  chatbot = gr.Chatbot(label="Chat History", height=600, bubble_full_width=False)
96
  with gr.Row():
@@ -104,7 +92,7 @@ with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue", secondary_hue="sky"), cs
104
 
105
  clear_btn = gr.ClearButton([msg, chatbot], value="πŸ—‘οΈ Clear Chat")
106
 
107
- # Event Handlers
108
  msg.submit(user_input_handler, [msg, chatbot], [msg, chatbot], queue=False).then(
109
  bot_stream, chatbot, chatbot
110
  )
@@ -113,4 +101,4 @@ with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue", secondary_hue="sky"), cs
113
  )
114
 
115
  demo.queue()
116
- demo.launch(debug=True)
 
3
  from threading import Thread
4
  from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
5
 
6
+ # Model and tokenizer config
7
  MODEL_REPO_ID = "LGAI-EXAONE/EXAONE-4.0-1.2B"
8
 
9
  print("βœ… Starting application...")
10
 
 
11
  try:
12
  print(f"πŸ”„ Loading tokenizer from '{MODEL_REPO_ID}'...")
13
+ tokenizer = AutoTokenizer.from_pretrained(
14
+ MODEL_REPO_ID,
15
+ trust_remote_code=True
16
+ )
17
  print("βœ… Tokenizer loaded successfully.")
18
 
19
  print(f"πŸ”„ Loading model '{MODEL_REPO_ID}' with torch_dtype=torch.bfloat16...")
20
+
21
  model = AutoModelForCausalLM.from_pretrained(
22
  MODEL_REPO_ID,
23
  torch_dtype=torch.bfloat16,
24
+ device_map="auto",
25
+ trust_remote_code=True
26
  )
27
  print("βœ… Model loaded successfully.")
28
 
29
  except Exception as e:
30
  print(f"❌ Error loading model or tokenizer: {e}")
 
31
  raise
32
 
33
+ # Streaming chat function
34
  def user_input_handler(user_message, history):
 
35
  return "", history + [[user_message, None]]
36
 
37
  def bot_stream(history):
 
 
 
 
 
 
38
  user_message = history[-1][0]
39
+ history[-1][1] = "" # Initialize the bot's response field
40
 
41
+ # Format the conversation history into the model's expected chat format
42
  messages = []
43
+ for human, assistant in history[:-1]:
44
  messages.append({"role": "user", "content": human})
45
+ if assistant:
46
  messages.append({"role": "assistant", "content": assistant})
47
  messages.append({"role": "user", "content": user_message})
48
 
49
+ # Apply the chat template to format the prompt correctly
50
+ prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
51
+
52
+ # Tokenize the formatted prompt
 
 
 
 
 
 
 
53
  inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
54
 
55
+ # Use TextIteratorStreamer for non-blocking, token-by-token generation
56
  streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
57
 
58
+ # Set up the generation parameters in a dictionary
59
  generation_kwargs = dict(
60
  inputs,
61
  streamer=streamer,
 
66
  eos_token_id=tokenizer.eos_token_id,
67
  )
68
 
69
+ # Run the generation in a separate thread
70
  thread = Thread(target=model.generate, kwargs=generation_kwargs)
71
  thread.start()
72
 
73
+ # Yield each new token to the Gradio chat interface as it's generated
74
  for token in streamer:
75
  history[-1][1] += token
76
  yield history
77
 
78
+ # Gradio UI
79
  with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue", secondary_hue="sky"), css="footer {display: none !important}") as demo:
80
  gr.Markdown("## πŸ€– EXAONE-4.0-1.2B")
81
+ gr.Markdown("This demo runs `LGAI-EXAONE/EXAONE-4.0-1.2B` model")
82
 
83
  chatbot = gr.Chatbot(label="Chat History", height=600, bubble_full_width=False)
84
  with gr.Row():
 
92
 
93
  clear_btn = gr.ClearButton([msg, chatbot], value="πŸ—‘οΈ Clear Chat")
94
 
95
+ # Event Handlers - when a message is submitted
96
  msg.submit(user_input_handler, [msg, chatbot], [msg, chatbot], queue=False).then(
97
  bot_stream, chatbot, chatbot
98
  )
 
101
  )
102
 
103
  demo.queue()
104
+ demo.launch(debug=True)