Spaces:
Runtime error
Runtime error
Update vlm.py
Browse files
vlm.py
CHANGED
|
@@ -127,7 +127,11 @@ def build_messages(input_dict: dict, history: list[tuple]):
|
|
| 127 |
#
|
| 128 |
@spaces.GPU
|
| 129 |
@torch.inference_mode()
|
| 130 |
-
def stream_response(
|
|
|
|
|
|
|
|
|
|
|
|
|
| 131 |
"""Stream the model's response to the chat interface.
|
| 132 |
|
| 133 |
Args:
|
|
@@ -148,7 +152,9 @@ def stream_response(messages: list[dict]):
|
|
| 148 |
generation_args = dict(
|
| 149 |
inputs,
|
| 150 |
streamer=streamer,
|
| 151 |
-
max_new_tokens=
|
|
|
|
|
|
|
| 152 |
do_sample=True
|
| 153 |
)
|
| 154 |
|
|
|
|
| 127 |
#
|
| 128 |
@spaces.GPU
|
| 129 |
@torch.inference_mode()
|
| 130 |
+
def stream_response(
|
| 131 |
+
messages: list[dict],
|
| 132 |
+
max_new_tokens: int=1_024,
|
| 133 |
+
temperature: float=0.15
|
| 134 |
+
):
|
| 135 |
"""Stream the model's response to the chat interface.
|
| 136 |
|
| 137 |
Args:
|
|
|
|
| 152 |
generation_args = dict(
|
| 153 |
inputs,
|
| 154 |
streamer=streamer,
|
| 155 |
+
max_new_tokens=max_new_tokens,
|
| 156 |
+
temperature=temperature,
|
| 157 |
+
top_p=0.9,
|
| 158 |
do_sample=True
|
| 159 |
)
|
| 160 |
|