update code for real streaming
Browse files
app.py
CHANGED
|
@@ -7,7 +7,7 @@ import requests
|
|
| 7 |
#Streaming endpoint
|
| 8 |
API_URL = os.getenv("API_URL") + "/generate_stream"
|
| 9 |
|
| 10 |
-
def
|
| 11 |
if not inputs.startswith("User: "):
|
| 12 |
inputs = "User: " + inputs + "\n"
|
| 13 |
payload = {
|
|
@@ -48,6 +48,50 @@ def predict(inputs, top_p, temperature, top_k, repetition_penalty, history=[]):
|
|
| 48 |
|
| 49 |
yield chat, history #resembles {chatbot: chat, state: history}
|
| 50 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 51 |
title = """<h1 align="center">Streaming your Chatbot output with Gradio</h1>"""
|
| 52 |
description = """Language models can be conditioned to act like dialogue agents through a conversational prompt that typically takes the form:
|
| 53 |
```
|
|
|
|
| 7 |
#Streaming endpoint
|
| 8 |
API_URL = os.getenv("API_URL") + "/generate_stream"
|
| 9 |
|
| 10 |
+
def predict_old(inputs, top_p, temperature, top_k, repetition_penalty, history=[]):
|
| 11 |
if not inputs.startswith("User: "):
|
| 12 |
inputs = "User: " + inputs + "\n"
|
| 13 |
payload = {
|
|
|
|
| 48 |
|
| 49 |
yield chat, history #resembles {chatbot: chat, state: history}
|
| 50 |
|
| 51 |
+
|
| 52 |
+
def predict(inputs, top_p, temperature, top_k, repetition_penalty, history=[]):
|
| 53 |
+
if not inputs.startswith("User: "):
|
| 54 |
+
inputs = "User: " + inputs + "\n"
|
| 55 |
+
payload = {
|
| 56 |
+
"inputs": inputs, #"My name is Jane and I",
|
| 57 |
+
"parameters": {
|
| 58 |
+
"details": True,
|
| 59 |
+
"do_sample": True,
|
| 60 |
+
"max_new_tokens": 100,
|
| 61 |
+
"repetition_penalty": repetition_penalty, #1.03,
|
| 62 |
+
"seed": 0,
|
| 63 |
+
"temperature": temperature, #0.5,
|
| 64 |
+
"top_k": top_k, #10,
|
| 65 |
+
"top_p": top_p #0.95
|
| 66 |
+
}
|
| 67 |
+
}
|
| 68 |
+
|
| 69 |
+
headers = {
|
| 70 |
+
'accept': 'text/event-stream',
|
| 71 |
+
'Content-Type': 'application/json'
|
| 72 |
+
}
|
| 73 |
+
|
| 74 |
+
history.append(inputs)
|
| 75 |
+
response = requests.post(API_URL2, headers=headers, json=payload, stream=True)
|
| 76 |
+
token_counter = 0
|
| 77 |
+
partial_words = "" #inputs
|
| 78 |
+
for chunk in response.iter_lines():
|
| 79 |
+
if chunk:
|
| 80 |
+
#print(chunk.decode())
|
| 81 |
+
partial_words = partial_words + json.loads(chunk.decode()[5:])['token']['text']
|
| 82 |
+
#print(partial_words)
|
| 83 |
+
time.sleep(0.05)
|
| 84 |
+
#print([(partial_words, "")])
|
| 85 |
+
if token_counter == 0:
|
| 86 |
+
history.append(" " + partial_words)
|
| 87 |
+
else:
|
| 88 |
+
history[-1] = partial_words
|
| 89 |
+
chat = [(history[i], history[i + 1]) for i in range(0, len(history) - 1, 2) ] # convert to tuples of list
|
| 90 |
+
#yield [(partial_words, history)]
|
| 91 |
+
token_counter+=1
|
| 92 |
+
yield chat, history #{chatbot: chat, state: history} #[(partial_words, history)]
|
| 93 |
+
|
| 94 |
+
|
| 95 |
title = """<h1 align="center">Streaming your Chatbot output with Gradio</h1>"""
|
| 96 |
description = """Language models can be conditioned to act like dialogue agents through a conversational prompt that typically takes the form:
|
| 97 |
```
|