Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
|
@@ -4,6 +4,7 @@ import threading
|
|
| 4 |
import time
|
| 5 |
import subprocess
|
| 6 |
import spaces
|
|
|
|
| 7 |
|
| 8 |
OLLAMA = os.path.expanduser("~/ollama")
|
| 9 |
process = None
|
|
@@ -100,7 +101,7 @@ def launch():
|
|
| 100 |
print("Giving ollama serve a moment")
|
| 101 |
time.sleep(10)
|
| 102 |
|
| 103 |
-
def stream_chat(message: str, history: list, model: str, temperature: float, max_new_tokens: int, top_p: float, top_k: int, penalty: float):
|
| 104 |
print(f"message: {message}")
|
| 105 |
conversation = []
|
| 106 |
for prompt, answer in history:
|
|
@@ -114,6 +115,7 @@ def stream_chat(message: str, history: list, model: str, temperature: float, max
|
|
| 114 |
|
| 115 |
response = client.chat(
|
| 116 |
model=model,
|
|
|
|
| 117 |
messages=conversation,
|
| 118 |
keep_alive="60s",
|
| 119 |
options={
|
|
@@ -127,8 +129,11 @@ def stream_chat(message: str, history: list, model: str, temperature: float, max
|
|
| 127 |
)
|
| 128 |
|
| 129 |
print(response)
|
| 130 |
-
|
| 131 |
-
|
|
|
|
|
|
|
|
|
|
| 132 |
|
| 133 |
|
| 134 |
def main(message: str, history: list, model: str, temperature: float, max_new_tokens: int, top_p: float, top_k: int, penalty: float):
|
|
@@ -141,8 +146,8 @@ def main(message: str, history: list, model: str, temperature: float, max_new_to
|
|
| 141 |
else:
|
| 142 |
if not process:
|
| 143 |
launch()
|
| 144 |
-
|
| 145 |
-
response = stream_chat(
|
| 146 |
message,
|
| 147 |
history,
|
| 148 |
model,
|
|
@@ -152,8 +157,7 @@ def main(message: str, history: list, model: str, temperature: float, max_new_to
|
|
| 152 |
top_k,
|
| 153 |
penalty
|
| 154 |
)
|
| 155 |
-
|
| 156 |
-
terminate()
|
| 157 |
yield response
|
| 158 |
|
| 159 |
|
|
|
|
| 4 |
import time
|
| 5 |
import subprocess
|
| 6 |
import spaces
|
| 7 |
+
import asynico
|
| 8 |
|
| 9 |
OLLAMA = os.path.expanduser("~/ollama")
|
| 10 |
process = None
|
|
|
|
| 101 |
print("Giving ollama serve a moment")
|
| 102 |
time.sleep(10)
|
| 103 |
|
| 104 |
+
async def stream_chat(message: str, history: list, model: str, temperature: float, max_new_tokens: int, top_p: float, top_k: int, penalty: float):
|
| 105 |
print(f"message: {message}")
|
| 106 |
conversation = []
|
| 107 |
for prompt, answer in history:
|
|
|
|
| 115 |
|
| 116 |
response = client.chat(
|
| 117 |
model=model,
|
| 118 |
+
stream=True,
|
| 119 |
messages=conversation,
|
| 120 |
keep_alive="60s",
|
| 121 |
options={
|
|
|
|
| 129 |
)
|
| 130 |
|
| 131 |
print(response)
|
| 132 |
+
|
| 133 |
+
buffer = ""
|
| 134 |
+
for chunk in response:
|
| 135 |
+
buffer += chunk["message"]["content"]
|
| 136 |
+
yield buffer
|
| 137 |
|
| 138 |
|
| 139 |
def main(message: str, history: list, model: str, temperature: float, max_new_tokens: int, top_p: float, top_k: int, penalty: float):
|
|
|
|
| 146 |
else:
|
| 147 |
if not process:
|
| 148 |
launch()
|
| 149 |
+
|
| 150 |
+
response = await stream_chat(
|
| 151 |
message,
|
| 152 |
history,
|
| 153 |
model,
|
|
|
|
| 157 |
top_k,
|
| 158 |
penalty
|
| 159 |
)
|
| 160 |
+
|
|
|
|
| 161 |
yield response
|
| 162 |
|
| 163 |
|