Spaces:

m7mdal7aj
/

KB-VQA

Sleeping

App Files Files Community

m7mdal7aj commited on May 17, 2024

Commit

9485a43

verified ·

1 Parent(s): 6740cd3

Update my_model/fine_tuner/fine_tuner.py

Browse files

Files changed (1) hide show

my_model/fine_tuner/fine_tuner.py +49 -16

my_model/fine_tuner/fine_tuner.py CHANGED Viewed

@@ -96,13 +96,12 @@ class Finetuner:
            - print_trainable_parameters: Prints the number of trainable parameters in the model.
        """
     def __init__(self, train_dataset: Dataset, eval_dataset: Dataset) -> None:
         """
         Initializes the Finetuner class with the model, tokenizer, and datasets.
         Args:
-            model (AutoModelForCausalLM): The pre-trained language model.
-            tokenizer (AutoTokenizer): The tokenizer for the model.
             train_dataset (Dataset): The dataset for training the model.
             eval_dataset (Dataset): The dataset for evaluating the model.
         """
@@ -111,7 +110,7 @@ class Finetuner:
         self.merged_model = None
         self.train_dataset = train_dataset
         self.eval_dataset = eval_dataset
-        # please refer to config file 'fine_tuning_config.py' for training arguments description.
         self.training_arguments = TrainingArguments(
             output_dir=config.OUTPUT_DIR,
             num_train_epochs=config.NUM_TRAIN_EPOCHS,
@@ -135,10 +134,9 @@ class Finetuner:
             report_to="tensorboard"
         )
-    def load_LLAMA2_for_finetuning(self):
         """
         Loads the LLAMA2 model and tokenizer, specifically configured for fine-tuning.
-        This method ensures the model is ready to be adapted to a specific task or dataset.
         Returns:
             Tuple[AutoModelForCausalLM, AutoTokenizer]: The loaded model and tokenizer.
@@ -148,6 +146,7 @@ class Finetuner:
         base_model, tokenizer = llm_manager.load_model_and_tokenizer(for_fine_tuning=True)
         return base_model, tokenizer
     def find_all_linear_names(self) -> List[str]:
         """
@@ -156,6 +155,7 @@ class Finetuner:
         Returns:
             List[str]: A list of linear layer names.
         """
         cls = bitsandbytes.nn.Linear4bit
         lora_module_names = set()
         for name, module in self.base_model.named_modules():
@@ -167,12 +167,16 @@ class Finetuner:
         lora_module_names -= {'lm_head', 'gate_proj'}
         return list(lora_module_names)
     def print_trainable_parameters(self, use_4bit: bool = False) -> None:
         """
         Calculates and prints the number of trainable parameters in the model.
         Args:
             use_4bit (bool): If true, calculates the parameter count considering 4-bit quantization.
         """
         trainable_params = sum(p.numel() for p in self.base_model.parameters() if p.requires_grad)
         if use_4bit:
@@ -188,6 +192,9 @@ class Finetuner:
         Args:
             peft_config (LoraConfig): Configuration for the PEFT training process.
         """
         self.base_model.config.use_cache = False
         # Set the pretraining_tp flag to 1 to enable the use of LoRA (Low-Rank Adapters) layers.
@@ -207,8 +214,7 @@ class Finetuner:
         )
         self.trainer.train()
-    def save_model(self):
         """
         Saves the fine-tuned model to the specified directory.
@@ -218,12 +224,15 @@ class Finetuner:
         for later use or evaluation.
         The saved model can be easily loaded using Hugging Face's model loading utilities.
         """
         self.fine_tuned_adapter_name = config.ADAPTER_SAVE_NAME
         self.trainer.model.save_pretrained(self.fine_tuned_adapter_name)
-    def merge_weights(self):
         """
         Merges the weights of the fine-tuned adapter with the base model.
@@ -234,18 +243,26 @@ class Finetuner:
         After merging, the weights of the adapter are no longer separate from the
         base model, enabling more efficient storage and deployment. The merged model
         is stored in the 'self.merged_model' attribute of the Finetuner class.
         """
         self.merged_model = PeftModel.from_pretrained(self.base_model, self.fine_tuned_adapter_name)
         self.merged_model = self.merged_model.merge_and_unload()
-    def delete_model(self, model_name: str):
         """
         Deletes a specified model attribute.
         Args:
             model_name (str): The name of the model attribute to delete.
         """
         try:
             if hasattr(self, model_name) and getattr(self, model_name) is not None:
                 delattr(self, model_name)
@@ -254,14 +271,19 @@ class Finetuner:
                 print(f"Warning: Model '{model_name}' has already been cleared or does not exist.")
         except Exception as e:
             print(f"Error occurred while deleting model '{model_name}': {str(e)}")
-    def delete_trainer(self, trainer_name: str):
         """
         Deletes a specified trainer object.
         Args:
             trainer_name (str): The name of the trainer object to delete.
         """
         try:
             if hasattr(self, trainer_name) and getattr(self, trainer_name) is not None:
                 delattr(self, trainer_name)
@@ -271,10 +293,15 @@ class Finetuner:
         except Exception as e:
             print(f"Error occurred while deleting trainer object '{trainer_name}': {str(e)}")
-    def clear_training_resources(self):
         """
         Clears GPU memory.
         """
         try:
             if torch.cuda.is_available():
                 torch.cuda.empty_cache()
@@ -282,10 +309,15 @@ class Finetuner:
         except Exception as e:
             print(f"Error occurred while clearing GPU memory: {str(e)}")
-    def clear_cache_and_collect_garbage(self):
         """
         Clears Hugging Face's Transformers cache and runs garbage collection.
         """
         try:
             if os.path.exists(TRANSFORMERS_CACHE):
                 shutil.rmtree(TRANSFORMERS_CACHE, ignore_errors=True)
@@ -296,7 +328,9 @@ class Finetuner:
         except Exception as e:
             print(f"Error occurred while clearing cache and collecting garbage: {str(e)}")
-def fine_tune(save_fine_tuned_adapter=False, merge=False, delete_trainer_after_fine_tune=False):
     """
     Conducts the fine-tuning process of a pre-trained language model using specified configurations.
     This function encompasses the complete workflow of fine-tuning, including data handling, training,
@@ -313,9 +347,8 @@ def fine_tune(save_fine_tuned_adapter=False, merge=False, delete_trainer_after_f
         delete_trainer_after_fine_tune (bool): If True, deletes the trainer object after fine-tuning to free up resources.
     Returns:
-        The fine-tuned model after the fine-tuning process. This could be either the merged model
-        or the trained model based on the provided arguments.
     """
     data_handler = FinetuningDataHandler()

            - print_trainable_parameters: Prints the number of trainable parameters in the model.
        """
     def __init__(self, train_dataset: Dataset, eval_dataset: Dataset) -> None:
         """
         Initializes the Finetuner class with the model, tokenizer, and datasets.
         Args:
             train_dataset (Dataset): The dataset for training the model.
             eval_dataset (Dataset): The dataset for evaluating the model.
         """
         self.merged_model = None
         self.train_dataset = train_dataset
         self.eval_dataset = eval_dataset
+        # please refer to config file 'my_model/config/fine_tuning_config.py' for training arguments description.
         self.training_arguments = TrainingArguments(
             output_dir=config.OUTPUT_DIR,
             num_train_epochs=config.NUM_TRAIN_EPOCHS,
             report_to="tensorboard"
         )
+    def load_LLAMA2_for_finetuning(self) -> Tuple[AutoModelForCausalLM, AutoTokenizer]:
         """
         Loads the LLAMA2 model and tokenizer, specifically configured for fine-tuning.
         Returns:
             Tuple[AutoModelForCausalLM, AutoTokenizer]: The loaded model and tokenizer.
         base_model, tokenizer = llm_manager.load_model_and_tokenizer(for_fine_tuning=True)
         return base_model, tokenizer
     def find_all_linear_names(self) -> List[str]:
         """
         Returns:
             List[str]: A list of linear layer names.
         """
         cls = bitsandbytes.nn.Linear4bit
         lora_module_names = set()
         for name, module in self.base_model.named_modules():
         lora_module_names -= {'lm_head', 'gate_proj'}
         return list(lora_module_names)
     def print_trainable_parameters(self, use_4bit: bool = False) -> None:
         """
         Calculates and prints the number of trainable parameters in the model.
         Args:
             use_4bit (bool): If true, calculates the parameter count considering 4-bit quantization.
+        Returns:
+            List[str]: None.
         """
         trainable_params = sum(p.numel() for p in self.base_model.parameters() if p.requires_grad)
         if use_4bit:
         Args:
             peft_config (LoraConfig): Configuration for the PEFT training process.
+        Returns:
+            List[str]: None.
         """
         self.base_model.config.use_cache = False
         # Set the pretraining_tp flag to 1 to enable the use of LoRA (Low-Rank Adapters) layers.
         )
         self.trainer.train()
+    def save_model(self) -> None:
         """
         Saves the fine-tuned model to the specified directory.
         for later use or evaluation.
         The saved model can be easily loaded using Hugging Face's model loading utilities.
+        Returns:
+            None
         """
         self.fine_tuned_adapter_name = config.ADAPTER_SAVE_NAME
         self.trainer.model.save_pretrained(self.fine_tuned_adapter_name)
+    def merge_weights(self) -> None:
         """
         Merges the weights of the fine-tuned adapter with the base model.
         After merging, the weights of the adapter are no longer separate from the
         base model, enabling more efficient storage and deployment. The merged model
         is stored in the 'self.merged_model' attribute of the Finetuner class.
+        Returns:
+            None
         """
         self.merged_model = PeftModel.from_pretrained(self.base_model, self.fine_tuned_adapter_name)
         self.merged_model = self.merged_model.merge_and_unload()
+    def delete_model(self, model_name: str) -> None:
         """
         Deletes a specified model attribute.
         Args:
             model_name (str): The name of the model attribute to delete.
+        Returns:
+            None
         """
         try:
             if hasattr(self, model_name) and getattr(self, model_name) is not None:
                 delattr(self, model_name)
                 print(f"Warning: Model '{model_name}' has already been cleared or does not exist.")
         except Exception as e:
             print(f"Error occurred while deleting model '{model_name}': {str(e)}")
+    def delete_trainer(self, trainer_name: str) -> None:
         """
         Deletes a specified trainer object.
         Args:
             trainer_name (str): The name of the trainer object to delete.
+        Returns:
+            None
         """
         try:
             if hasattr(self, trainer_name) and getattr(self, trainer_name) is not None:
                 delattr(self, trainer_name)
         except Exception as e:
             print(f"Error occurred while deleting trainer object '{trainer_name}': {str(e)}")
+    def clear_training_resources(self) -> None:
         """
         Clears GPU memory.
+        Returns:
+            None
         """
         try:
             if torch.cuda.is_available():
                 torch.cuda.empty_cache()
         except Exception as e:
             print(f"Error occurred while clearing GPU memory: {str(e)}")
+    def clear_cache_and_collect_garbage(self) -> None:
         """
         Clears Hugging Face's Transformers cache and runs garbage collection.
+        Returns:
+            None
         """
         try:
             if os.path.exists(TRANSFORMERS_CACHE):
                 shutil.rmtree(TRANSFORMERS_CACHE, ignore_errors=True)
         except Exception as e:
             print(f"Error occurred while clearing cache and collecting garbage: {str(e)}")
+def fine_tune(save_fine_tuned_adapter: bool = False, merge: bool = False, delete_trainer_after_fine_tune: bool = False) -> AutoModelForCausalLM:
     """
     Conducts the fine-tuning process of a pre-trained language model using specified configurations.
     This function encompasses the complete workflow of fine-tuning, including data handling, training,
         delete_trainer_after_fine_tune (bool): If True, deletes the trainer object after fine-tuning to free up resources.
     Returns:
+        AutoModelForCausalLM: The fine-tuned model after the fine-tuning process. This could be either the merged model
+                              or the trained model based on the provided arguments.
     """
     data_handler = FinetuningDataHandler()