Wasim0606 commited on
Commit
ee8aa18
·
verified ·
1 Parent(s): 77ec0a1

Update serenityai.py

Browse files
Files changed (1) hide show
  1. serenityai.py +0 -229
serenityai.py CHANGED
@@ -8,243 +8,14 @@ Original file is located at
8
  """
9
 
10
  # %% Cell 1 - Install Dependencies
11
- !pip install -q torch==2.2.1 torchvision torchaudio xformers
12
- !pip install -q fastapi nest-asyncio pyngrok uvicorn python-multipart streamlit langchain chromadb pydub whisper openai-whisper
13
 
14
- !pip install -q torch==2.2.1 torchvision torchaudio xformers
15
 
16
- !pip install --upgrade unsloth
17
 
18
- !pip install optuna
19
 
20
  # %% Cell 1 - Setup and Configuration
21
  import os
22
  import torch
23
- import optuna # Hyperparameter tuning
24
- from datasets import load_dataset
25
- from unsloth import FastLanguageModel
26
- from transformers import TrainingArguments, Trainer
27
- import wandb
28
-
29
- # Initialize wandb
30
- wandb.init(project="serenity-ai-advanced")
31
-
32
- # Hardware configuration
33
- print(f"Available GPUs: {torch.cuda.device_count()}")
34
- print(f"CUDA version: {torch.version.cuda}")
35
- torch.cuda.empty_cache()
36
-
37
- # Model configuration
38
- model_name = "unsloth/llama-3-8B-bnb-4bit"
39
- max_seq_length = 2048 # Max for Llama-3
40
- dtype = torch.float16
41
- checkpoint_dir = "./serenity-advanced-checkpoints"
42
-
43
- # %% Cell 2 - Dataset Processing & Tokenization
44
- def format_chat_template(example):
45
- """
46
- Formats the dataset into a chat-like structure.
47
- Uses 'utterance' and a placeholder for response
48
- since the dataset doesn't have a dedicated 'response' column.
49
- """
50
- return {
51
- "text": f"User: {example['utterance']}\nAI: [Placeholder Response]" # Placeholder response for now
52
- }
53
-
54
-
55
- # Load and process dataset
56
- dataset = load_dataset("empathetic_dialogues", split="train[:2000]") # First 2000 samples
57
- dataset = dataset.map(format_chat_template, remove_columns=dataset.column_names)
58
-
59
- # Split dataset
60
- split_dataset = dataset.train_test_split(test_size=0.1)
61
- train_dataset = split_dataset["train"]
62
- eval_dataset = split_dataset["test"]
63
-
64
- # Tokenization function
65
- def tokenize_function(examples):
66
- return tokenizer(
67
- examples["text"],
68
- padding="max_length",
69
- truncation=True,
70
- max_length=max_seq_length,
71
- return_tensors="pt",
72
- )
73
-
74
- # Apply tokenization
75
- tokenized_train = train_dataset.map(
76
- tokenize_function,
77
- batched=True,
78
- remove_columns=["text"]
79
- )
80
-
81
- tokenized_eval = eval_dataset.map(
82
- tokenize_function,
83
- batched=True,
84
- remove_columns=["text"]
85
- )
86
-
87
- # Create data collator
88
- data_collator = DataCollatorForLanguageModeling(
89
- tokenizer=tokenizer,
90
- mlm=False
91
- )
92
-
93
- # %% Cell 3 - Resume Training from Checkpoint if Available
94
- latest_checkpoint = None
95
- if os.path.exists(checkpoint_dir):
96
- checkpoints = sorted([ckpt for ckpt in os.listdir(checkpoint_dir) if ckpt.startswith("checkpoint-")])
97
- if checkpoints:
98
- latest_checkpoint = os.path.join(checkpoint_dir, checkpoints[-1])
99
- print(f"Resuming training from checkpoint: {latest_checkpoint}")
100
- else:
101
- print("No checkpoint found, starting fresh.")
102
-
103
- # %% Cell 4 - Define Hyperparameter Search Function
104
- def hyperparameter_objective(trial):
105
- """Optuna function to find the best hyperparameters."""
106
- # Hyperparameter search space
107
- learning_rate = trial.suggest_float("learning_rate", 1e-5, 5e-4, log=True)
108
- batch_size = trial.suggest_categorical("batch_size", [2, 4, 8])
109
- weight_decay = trial.suggest_float("weight_decay", 0.01, 0.1)
110
- lora_rank = trial.suggest_int("lora_rank", 16, 64, step=16)
111
- lora_dropout = trial.suggest_float("lora_dropout", 0.05, 0.2)
112
-
113
- # %% Load Model with Optimized LoRA
114
- model, tokenizer = FastLanguageModel.from_pretrained(
115
- model_name=model_name,
116
- max_seq_length=max_seq_length,
117
- dtype=dtype,
118
- load_in_4bit=True,
119
- token=None,
120
- device_map="auto",
121
- rope_scaling={"type": "dynamic", "factor": 2.0},
122
- attn_implementation="flash_attention_2",
123
- )
124
-
125
- model = FastLanguageModel.get_peft_model(
126
- model,
127
- r=lora_rank,
128
- target_modules=["q_proj", "k_proj", "v_proj", "o_proj",
129
- "gate_proj", "up_proj", "down_proj"],
130
- lora_alpha=64,
131
- lora_dropout=lora_dropout,
132
- bias="none",
133
- use_gradient_checkpointing="unsloth",
134
- random_state=3407,
135
- max_seq_length=max_seq_length,
136
- use_rslora=True,
137
- loftq_config={},
138
- )
139
-
140
- # Training Arguments
141
- training_args = TrainingArguments(
142
- output_dir=checkpoint_dir,
143
- num_train_epochs=2,
144
- per_device_train_batch_size=batch_size,
145
- gradient_accumulation_steps=4,
146
- learning_rate=learning_rate,
147
- weight_decay=weight_decay,
148
- warmup_ratio=0.1,
149
- lr_scheduler_type="cosine",
150
- evaluation_strategy="steps",
151
- eval_steps=5000,
152
- logging_steps=10,
153
- fp16=torch.cuda.is_available(),
154
- seed=42,
155
- report_to="wandb",
156
- save_strategy="steps",
157
- save_steps=5000,
158
- save_total_limit=3,
159
- load_best_model_at_end=True,
160
- gradient_checkpointing=True,
161
- push_to_hub=False,
162
- )
163
-
164
- #trainer = Trainer(
165
- model=model,
166
- args=training_args,
167
- train_dataset=tokenized_train,
168
- eval_dataset=tokenized_eval,
169
- data_collator=data_collator,
170
- tokenizer=tokenizer,
171
- )
172
-
173
- #trainer.train(resume_from_checkpoint=latest_checkpoint)
174
-
175
- # Return validation loss for optimization
176
- eval_results = trainer.evaluate()
177
- return eval_results["eval_loss"]
178
-
179
- # %% Cell 5 - Run Optuna Hyperparameter Optimization
180
- study = optuna.create_study(direction="minimize")
181
- study.optimize(hyperparameter_objective, n_trials=10) # Run 10 trials
182
-
183
- # %% Cell 6 - Train with Best Hyperparameters
184
- best_params = study.best_params
185
- print(f"Best Hyperparameters: {best_params}")
186
-
187
- # Load best model
188
- model, tokenizer = FastLanguageModel.from_pretrained(
189
- model_name=model_name,
190
- max_seq_length=max_seq_length,
191
- dtype=dtype,
192
- load_in_4bit=True,
193
- token=None,
194
- device_map="auto",
195
- rope_scaling={"type": "dynamic", "factor": 2.0},
196
- attn_implementation="flash_attention_2",
197
- )
198
-
199
- model = FastLanguageModel.get_peft_model(
200
- model,
201
- r=best_params["lora_rank"],
202
- target_modules=["q_proj", "k_proj", "v_proj", "o_proj",
203
- "gate_proj", "up_proj", "down_proj"],
204
- lora_alpha=64,
205
- lora_dropout=best_params["lora_dropout"],
206
- bias="none",
207
- use_gradient_checkpointing="unsloth",
208
- random_state=3407,
209
- max_seq_length=max_seq_length,
210
- use_rslora=True,
211
- loftq_config={},
212
- )
213
-
214
- # Final Training
215
- training_args = TrainingArguments(
216
- output_dir=checkpoint_dir,
217
- num_train_epochs=2,
218
- per_device_train_batch_size=best_params["batch_size"],
219
- gradient_accumulation_steps=4,
220
- learning_rate=best_params["learning_rate"],
221
- weight_decay=best_params["weight_decay"],
222
- warmup_ratio=0.1,
223
- lr_scheduler_type="cosine",
224
- evaluation_strategy="steps",
225
- eval_steps=5000,
226
- logging_steps=10,
227
- fp16=torch.cuda.is_available(),
228
- seed=42,
229
- report_to="wandb",
230
- save_strategy="steps",
231
- save_steps=5000,
232
- save_total_limit=3,
233
- load_best_model_at_end=True,
234
- gradient_checkpointing=True,
235
- push_to_hub=False,
236
- )
237
-
238
- #trainer = Trainer(
239
- model=model,
240
- args=training_args,
241
- train_dataset=tokenized_train,
242
- eval_dataset=tokenized_eval,
243
- data_collator=data_collator,
244
- tokenizer=tokenizer,
245
- )
246
 
247
- #trainer.train(resume_from_checkpoint=latest_checkpoint)
248
 
249
  # %% Save the Optimized Model
250
  model.save_pretrained_merged("serenity-advanced", tokenizer, save_method="merged_16bit", push_to_hub=False)
 
8
  """
9
 
10
  # %% Cell 1 - Install Dependencies
 
 
11
 
 
12
 
 
13
 
 
14
 
15
  # %% Cell 1 - Setup and Configuration
16
  import os
17
  import torch
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
 
 
19
 
20
  # %% Save the Optimized Model
21
  model.save_pretrained_merged("serenity-advanced", tokenizer, save_method="merged_16bit", push_to_hub=False)