Keeby-smilyai commited on
Commit
5c47c84
·
verified ·
1 Parent(s): f49b7f0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -5
app.py CHANGED
@@ -517,6 +517,14 @@ class ModelWrapper:
517
  token_text = self.tokenizer.decode([token_id])
518
  response_text += token_text
519
  yield response_text
 
 
 
 
 
 
 
 
520
 
521
  # ==============================================================================
522
  # GRADIO INTERFACE
@@ -545,16 +553,15 @@ def chat_fn(message, history, temperature, top_k, top_p, max_tokens):
545
  # Add current message
546
  conversation += f"User: {message} Sam:"
547
 
548
- # Generate response
549
- response = model.generate(
550
  conversation,
551
  max_new_tokens=max_tokens,
552
  temperature=temperature,
553
  top_k=top_k,
554
  top_p=top_p
555
- )
556
-
557
- return response
558
 
559
  # Create Gradio interface
560
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
 
517
  token_text = self.tokenizer.decode([token_id])
518
  response_text += token_text
519
  yield response_text
520
+
521
+ def generate(self, prompt: str, max_new_tokens: int = 200,
522
+ temperature: float = 0.8, top_k: int = 50, top_p: float = 0.9):
523
+ """Non-streaming generation (returns full response)"""
524
+ response = ""
525
+ for partial_response in self.generate_stream(prompt, max_new_tokens, temperature, top_k, top_p):
526
+ response = partial_response
527
+ return response
528
 
529
  # ==============================================================================
530
  # GRADIO INTERFACE
 
553
  # Add current message
554
  conversation += f"User: {message} Sam:"
555
 
556
+ # Stream response token by token
557
+ for response in model.generate_stream(
558
  conversation,
559
  max_new_tokens=max_tokens,
560
  temperature=temperature,
561
  top_k=top_k,
562
  top_p=top_p
563
+ ):
564
+ yield response
 
565
 
566
  # Create Gradio interface
567
  with gr.Blocks(theme=gr.themes.Soft()) as demo: