Spaces:

Wasim0606
/

serenity_ai_advanced_mental_health_assistance

Runtime error

App Files Files Community

Wasim0606 commited on Mar 20

Commit

4b11919

verified ·

1 Parent(s): 5985e62

Update serenityai.py

Browse files

Files changed (1) hide show

serenityai.py +9 -84

serenityai.py CHANGED Viewed

@@ -7,6 +7,7 @@ Original file is located at
     https://colab.research.google.com/drive/1LV3l6IWVK64-7RI2C7wEiW9r7ghx9d-o
 """
 # %% Cell 1 - Model Initialization with Checkpoint Saving
 import torch
 from unsloth import FastLanguageModel
 import os
@@ -14,24 +15,18 @@ import os
 # Configuration
 model_name = "unsloth/llama-3-8B-bnb-4bit"
 max_seq_length = 2048
-dtype = torch.float16
 checkpoint_dir = "./serenity_checkpoints/initial_checkpoint"
 os.makedirs(checkpoint_dir, exist_ok=True)
-# Hardware setup
-print(f"Available GPUs: {torch.cuda.device_count()}")
-print(f"CUDA version: {torch.version.cuda}")
-torch.cuda.empty_cache()
-# Load model with optimized configuration
 model, tokenizer = FastLanguageModel.from_pretrained(
     model_name=model_name,
     max_seq_length=max_seq_length,
     dtype=dtype,
-    load_in_4bit=True,
-    device_map="auto",
-    rope_scaling={"type": "dynamic", "factor": 2.0},
-    attn_implementation="flash_attention_2",
 )
 # Apply LoRA configuration
@@ -91,8 +86,8 @@ def load_from_checkpoint(checkpoint_path):
         model_name=checkpoint_path,
         max_seq_length=max_seq_length,
         dtype=dtype,
-        load_in_4bit=True,
-        device_map="auto",
     )
 # Test loading
@@ -101,79 +96,9 @@ print("Checkpoint loaded successfully!")
 # Example inference
 prompt = "User: How can I preserve my mental energy throughout the day?\nAI:"
-inputs = loaded_tokenizer(prompt, return_tensors="pt").to("cuda")
 outputs = loaded_model.generate(**inputs, max_new_tokens=100)
 print(loaded_tokenizer.decode(outputs[0], skip_special_tokens=True))
-# %% Cell 6 - Validation and Testing
-# %% Fixing Tokenizer and Special Tokens Handling
-from unsloth import FastLanguageModel
-from transformers import AddedToken, AutoTokenizer
-import torch
-# Define Llama-3 chat template
-LLAMA3_CHAT_TEMPLATE = """
-{% for message in messages %}
-{% if message['role'] == 'system' %}
-<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n{{ message['content'] }}<|eot_id|>
-{% elif message['role'] == 'user' %}
-<|start_header_id|>user<|end_header_id|>\n\n{{ message['content'] }}<|eot_id|>
-{% elif message['role'] == 'assistant' %}
-<|start_header_id|>assistant<|end_header_id|>\n\n{{ message['content'] }}<|eot_id|>
-{% endif %}
-{% endfor %}
-<|start_header_id|>assistant<|end_header_id|>\n\n
-"""
-# Initialize tokenizer with proper template
-tokenizer = AutoTokenizer.from_pretrained(
-    "unsloth/llama-3-8B-bnb-4bit",
-    padding_side="right",
-    truncation_side="right",
-    pad_token="<|end_of_text|>",
-    additional_special_tokens=[
-        "<|begin_of_text|>",
-        "<|start_header_id|>",
-        "<|end_header_id|>",
-        "<|eot_id|>",
-    ],
-    tokenizer_type="llama",
-    use_fast=True,
-)
-# Set the chat template explicitly
-tokenizer.chat_template = LLAMA3_CHAT_TEMPLATE
-# Initialize model
-model, _ = FastLanguageModel.from_pretrained(
-    model_name="unsloth/llama-3-8B-bnb-4bit",
-    max_seq_length=2048,
-    dtype=torch.float16,
-    load_in_4bit=True,
-    device_map="auto",
-)
-# Align model config with tokenizer
-model.config.pad_token_id = tokenizer.pad_token_id
-model.config.eos_token_id = tokenizer.eos_token_id
-model.config.bos_token_id = tokenizer.bos_token_id
-# Verify chat template
-print("Chat template configured:", tokenizer.chat_template is not None)
-# Example usage
-messages = [
-    {"role": "system", "content": "You are Serenity AI..."},
-    {"role": "user", "content": "I'm feeling anxious..."}
-]
-formatted_prompt = tokenizer.apply_chat_template(
-    messages,
-    tokenize=True,
-    add_generation_prompt=True
-)
-print("Formatted prompt:\n", formatted_prompt)
 import os
 import json

     https://colab.research.google.com/drive/1LV3l6IWVK64-7RI2C7wEiW9r7ghx9d-o
 """
 # %% Cell 1 - Model Initialization with Checkpoint Saving
+# %% Cell 1 - Model Initialization with Checkpoint Saving
 import torch
 from unsloth import FastLanguageModel
 import os
 # Configuration
 model_name = "unsloth/llama-3-8B-bnb-4bit"
 max_seq_length = 2048
+dtype = torch.float32  # ✅ Change to float32 for CPU
 checkpoint_dir = "./serenity_checkpoints/initial_checkpoint"
 os.makedirs(checkpoint_dir, exist_ok=True)
+# Load model with optimized configuration for CPU
 model, tokenizer = FastLanguageModel.from_pretrained(
     model_name=model_name,
     max_seq_length=max_seq_length,
     dtype=dtype,
+    load_in_4bit=False,  # ✅ Disable 4-bit quantization for CPU
+    device_map="cpu",  # ✅ Force CPU usage
+    rope_scaling={"type": "dynamic", "factor": 2.0"},
 )
 # Apply LoRA configuration
         model_name=checkpoint_path,
         max_seq_length=max_seq_length,
         dtype=dtype,
+        load_in_4bit=False,  # ✅ Ensure 4-bit is off for CPU
+        device_map="cpu",
     )
 # Test loading
 # Example inference
 prompt = "User: How can I preserve my mental energy throughout the day?\nAI:"
+inputs = loaded_tokenizer(prompt, return_tensors="pt").to("cpu")  # ✅ Move to CPU
 outputs = loaded_model.generate(**inputs, max_new_tokens=100)
 print(loaded_tokenizer.decode(outputs[0], skip_special_tokens=True))
 import os
 import json