Spaces:
Build error
Build error
| import re | |
| import pandas as pd | |
| from modules.scraper import get_raw_data, get_raw_data_sheets | |
| from modules.embedding_storage import process_safety_with_chroma | |
| from modules.qa_chatbot import create_chatbot, ask_question | |
| def extract_column_name(query_template): | |
| """ | |
| Extract the column name from the query template enclosed in curly braces. | |
| """ | |
| match = re.search(r"\{(.*?)\}", query_template) | |
| if not match: | |
| raise ValueError("No placeholder found in the query template. Ensure the query contains a placeholder like {column_name}.") | |
| return match.group(1) | |
| def process_query_and_update_csv(file_path, query_template): | |
| """ | |
| Processes the queries based on the specified column, updates the CSV file, | |
| and adds an 'Answer' column with responses. | |
| """ | |
| column_name = extract_column_name(query_template) | |
| df = pd.read_csv(file_path) | |
| if column_name not in df.columns: | |
| raise ValueError(f"The specified column '{column_name}' is missing in the provided CSV file.") | |
| if "Answer" not in df.columns: | |
| df["Answer"] = "" | |
| for index, row in df.iterrows(): | |
| value = row[column_name] | |
| query = query_template.replace(f"{{{column_name}}}", str(value)) | |
| # Process the query using provided functions | |
| raw_data = get_raw_data(file_path, query) | |
| print(raw_data) | |
| vector_store = process_safety_with_chroma(raw_data) | |
| qa_system = create_chatbot(vector_store) | |
| prompt = f"Give me the exact answer for this below query '{query}' in a structured format with a link from the content provided only." | |
| answer = ask_question(qa_system, prompt) | |
| df.at[index, "Answer"] = answer | |
| df.to_csv(file_path, index=False) | |
| return df | |
| def process_query_and_update_sheets(file_path, df, query_template): | |
| """ | |
| Processes the queries based on the specified column, updates the CSV file, | |
| and adds an 'Answer' column with responses. | |
| """ | |
| column_name = extract_column_name(query_template) | |
| # df = pd.read_csv(file_path) | |
| if column_name not in df.columns: | |
| raise ValueError(f"The specified column '{column_name}' is missing in the provided CSV file.") | |
| if "Answer" not in df.columns: | |
| df["Answer"] = "" | |
| for index, row in df.iterrows(): | |
| value = row[column_name] | |
| query = query_template.replace(f"{{{column_name}}}", str(value)) | |
| # print( "Value : ", value, "Query : ", query) | |
| # Process the query using provided functions | |
| raw_data = get_raw_data_sheets(query) | |
| vector_store = process_safety_with_chroma(raw_data) | |
| qa_system = create_chatbot(vector_store) | |
| prompt = f"Give me the exact answer for this below query '{query}' in a structured format with a link from the content provided only." | |
| answer = ask_question(qa_system, prompt) | |
| df.at[index, "Answer"] = answer | |
| print("ddddddd") | |
| print(df) | |
| # df.to_csv(file_path, index=False) | |
| return df | |