Spaces:
Sleeping
Sleeping
| from llmgaurdrails.custom_models.groundedness_checker.pdf_data_chunker import process_pdf | |
| import pandas as pd | |
| from llmgaurdrails.custom_models.groundedness_checker.llm_based_qa_generator import LLMBasedQAGenerator | |
| import pickle | |
| from llmgaurdrails.model_inference.groundedness_checker import GroundednessChecker | |
| from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix | |
| def get_eval_data(eval_pdf_paths:list, | |
| regenerate=False, | |
| path_to_save='eval_dataset'): | |
| if regenerate: | |
| print("regenerating") | |
| # pdf_path = # Replace with your PDF | |
| pdf_paths = eval_pdf_paths | |
| all_chunks = [] | |
| for path in pdf_paths: | |
| chunks = process_pdf(path) | |
| all_chunks.append(chunks) | |
| chunks_flattened = [x for xs in all_chunks for x in xs] | |
| qa_generator = LLMBasedQAGenerator() | |
| dataset = qa_generator.generate_dataset(chunks_flattened ,persist_dataset=True,presisted_file_path=path_to_save) | |
| return dataset | |
| else: | |
| if path_to_save: | |
| dataset = pickle.load(open(path_to_save,'rb')) | |
| return dataset | |
| else: | |
| raise ValueError("Please specify the path where the dataset was previously saved in the parameter 'path_to_save' ") | |
| def evaluate(dataset): | |
| groundedness_checker = GroundednessChecker() | |
| eval_df = pd.DataFrame(data= dataset) | |
| predictions = [] | |
| confidence_scores = [] | |
| for i,row in eval_df.iterrows(): | |
| groundedness_result = groundedness_checker.check( | |
| question=row['question'], | |
| answer=row['answer'], | |
| context=row['context']) | |
| predictions.append(groundedness_result['is_grounded']) | |
| confidence_scores.append(groundedness_result['confidence']) | |
| eval_df['predicted'] = predictions | |
| eval_df['confidence'] = confidence_scores | |
| accuracy = accuracy_score(eval_df['label'], eval_df['predicted']) | |
| precision = precision_score(eval_df['label'], eval_df['predicted']) | |
| recall = recall_score(eval_df['label'], eval_df['predicted']) | |
| f1 = f1_score(eval_df['label'], eval_df['predicted']) | |
| conf_matrix = confusion_matrix(eval_df['label'], eval_df['predicted']) | |
| print("Accuracy:", accuracy) | |
| print("Precision:", precision) | |
| print("Recall:", recall) | |
| print("F1 Score:", f1) | |
| print("Confusion Matrix:\n", conf_matrix) | |
| # Usage | |
| if __name__ == "__main__": | |
| dataset = get_eval_data(eval_pdf_paths=[["D:\Sasidhar\Projects\llm_gaurdrails\llmgaurdrails\data\CreditCard.pdf"]]) | |
| evaluate(dataset) | |