Update app.py
Browse files
app.py
CHANGED
|
@@ -60,7 +60,7 @@ accelerator = Accelerator()
|
|
| 60 |
model = AutoModelForCausalLM.from_pretrained(model_id, token= token,
|
| 61 |
# torch_dtype= torch.uint8,
|
| 62 |
torch_dtype=torch.float16,
|
| 63 |
-
|
| 64 |
# # # torch_dtype=torch.fl,
|
| 65 |
attn_implementation="flash_attention_2",
|
| 66 |
low_cpu_mem_usage=True,
|
|
@@ -96,6 +96,8 @@ def respond(
|
|
| 96 |
temperature,
|
| 97 |
top_p,
|
| 98 |
):
|
|
|
|
|
|
|
| 99 |
messages = []
|
| 100 |
json_obj = str_to_json(message)
|
| 101 |
print(json_obj)
|
|
|
|
| 60 |
model = AutoModelForCausalLM.from_pretrained(model_id, token= token,
|
| 61 |
# torch_dtype= torch.uint8,
|
| 62 |
torch_dtype=torch.float16,
|
| 63 |
+
load_in_4bit=True,
|
| 64 |
# # # torch_dtype=torch.fl,
|
| 65 |
attn_implementation="flash_attention_2",
|
| 66 |
low_cpu_mem_usage=True,
|
|
|
|
| 96 |
temperature,
|
| 97 |
top_p,
|
| 98 |
):
|
| 99 |
+
# model.to(accelerator.device)
|
| 100 |
+
|
| 101 |
messages = []
|
| 102 |
json_obj = str_to_json(message)
|
| 103 |
print(json_obj)
|