Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
|
@@ -33,9 +33,11 @@ if not torch.cuda.is_available():
|
|
| 33 |
if torch.cuda.is_available():
|
| 34 |
# model_id = "meta-llama/Llama-2-7b-chat-hf"
|
| 35 |
model_id = "mistralai/Mistral-7B-Instruct-v0.2"
|
| 36 |
-
model = AutoModelForCausalLM.from_pretrained(model_id, device_map="
|
| 37 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
| 38 |
tokenizer.use_default_system_prompt = False
|
|
|
|
|
|
|
| 39 |
|
| 40 |
|
| 41 |
@spaces.GPU
|
|
@@ -50,8 +52,8 @@ def generate(
|
|
| 50 |
repetition_penalty: float = 1.2,
|
| 51 |
) -> Iterator[str]:
|
| 52 |
conversation = []
|
| 53 |
-
if system_prompt:
|
| 54 |
-
|
| 55 |
for user, assistant in chat_history:
|
| 56 |
conversation.extend([{"role": "user", "content": user}, {"role": "assistant", "content": assistant}])
|
| 57 |
conversation.append({"role": "user", "content": message})
|
|
|
|
| 33 |
if torch.cuda.is_available():
|
| 34 |
# model_id = "meta-llama/Llama-2-7b-chat-hf"
|
| 35 |
model_id = "mistralai/Mistral-7B-Instruct-v0.2"
|
| 36 |
+
model = AutoModelForCausalLM.from_pretrained(model_id, device_map="cuda", torch_dtype=torch.bfloat16)
|
| 37 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
| 38 |
tokenizer.use_default_system_prompt = False
|
| 39 |
+
print(f"{model}")
|
| 40 |
+
print(f"{model.device}")
|
| 41 |
|
| 42 |
|
| 43 |
@spaces.GPU
|
|
|
|
| 52 |
repetition_penalty: float = 1.2,
|
| 53 |
) -> Iterator[str]:
|
| 54 |
conversation = []
|
| 55 |
+
# if system_prompt:
|
| 56 |
+
# conversation.append({"role": "system", "content": system_prompt})
|
| 57 |
for user, assistant in chat_history:
|
| 58 |
conversation.extend([{"role": "user", "content": user}, {"role": "assistant", "content": assistant}])
|
| 59 |
conversation.append({"role": "user", "content": message})
|