Spaces:

rphrp1985
/

zerogpu

Running on Zero

rphrp1985 commited on Jun 14, 2024

Commit

586b115

verified ·

1 Parent(s): 129a84f

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -60,7 +60,7 @@ accelerator = Accelerator()
 model = AutoModelForCausalLM.from_pretrained(model_id, token= token,
                                                  # torch_dtype= torch.uint8,
                                              torch_dtype=torch.float16,
-                                              load_in_8bit=True,
                                              # #  # torch_dtype=torch.fl,
                                              attn_implementation="flash_attention_2",
                                              low_cpu_mem_usage=True,
@@ -96,6 +96,8 @@ def respond(
     temperature,
     top_p,
 ):
     messages = []
     json_obj = str_to_json(message)
     print(json_obj)

 model = AutoModelForCausalLM.from_pretrained(model_id, token= token,
                                                  # torch_dtype= torch.uint8,
                                              torch_dtype=torch.float16,
+                                              load_in_4bit=True,
                                              # #  # torch_dtype=torch.fl,
                                              attn_implementation="flash_attention_2",
                                              low_cpu_mem_usage=True,
     temperature,
     top_p,
 ):
+    # model.to(accelerator.device)
     messages = []
     json_obj = str_to_json(message)
     print(json_obj)