Spaces:

nxphi47
/

test-zero-gpu

Paused

nxphi47 commited on Mar 21, 2024

Commit

629a061

verified ·

1 Parent(s): 7c89126

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -33,9 +33,11 @@ if not torch.cuda.is_available():
 if torch.cuda.is_available():
     # model_id = "meta-llama/Llama-2-7b-chat-hf"
     model_id = "mistralai/Mistral-7B-Instruct-v0.2"
-    model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", torch_dtype=torch.bfloat16)
     tokenizer = AutoTokenizer.from_pretrained(model_id)
     tokenizer.use_default_system_prompt = False
 @spaces.GPU
@@ -50,8 +52,8 @@ def generate(
     repetition_penalty: float = 1.2,
 ) -> Iterator[str]:
     conversation = []
-    if system_prompt:
-        conversation.append({"role": "system", "content": system_prompt})
     for user, assistant in chat_history:
         conversation.extend([{"role": "user", "content": user}, {"role": "assistant", "content": assistant}])
     conversation.append({"role": "user", "content": message})

 if torch.cuda.is_available():
     # model_id = "meta-llama/Llama-2-7b-chat-hf"
     model_id = "mistralai/Mistral-7B-Instruct-v0.2"
+    model = AutoModelForCausalLM.from_pretrained(model_id, device_map="cuda", torch_dtype=torch.bfloat16)
     tokenizer = AutoTokenizer.from_pretrained(model_id)
     tokenizer.use_default_system_prompt = False
+    print(f"{model}")
+    print(f"{model.device}")
 @spaces.GPU
     repetition_penalty: float = 1.2,
 ) -> Iterator[str]:
     conversation = []
+    # if system_prompt:
+    #     conversation.append({"role": "system", "content": system_prompt})
     for user, assistant in chat_history:
         conversation.extend([{"role": "user", "content": user}, {"role": "assistant", "content": assistant}])
     conversation.append({"role": "user", "content": message})