Spaces:
Running
on
Zero
Running
on
Zero
| import os | |
| from typing import List, Type | |
| import boto3 | |
| import pandas as pd | |
| from tools.config import ( | |
| AWS_REGION, | |
| DOCUMENT_REDACTION_BUCKET, | |
| RUN_AWS_FUNCTIONS, | |
| S3_OUTPUTS_BUCKET, | |
| SAVE_LOGS_TO_CSV, | |
| ) | |
| from tools.secure_path_utils import secure_join | |
| PandasDataFrame = Type[pd.DataFrame] | |
| def get_assumed_role_info(): | |
| sts_endpoint = "https://sts." + AWS_REGION + ".amazonaws.com" | |
| sts = boto3.client("sts", region_name=AWS_REGION, endpoint_url=sts_endpoint) | |
| response = sts.get_caller_identity() | |
| # Extract ARN of the assumed role | |
| assumed_role_arn = response["Arn"] | |
| # Extract the name of the assumed role from the ARN | |
| assumed_role_name = assumed_role_arn.split("/")[-1] | |
| return assumed_role_arn, assumed_role_name | |
| if RUN_AWS_FUNCTIONS: | |
| try: | |
| session = boto3.Session(region_name=AWS_REGION) | |
| except Exception as e: | |
| print("Could not start boto3 session:", e) | |
| try: | |
| assumed_role_arn, assumed_role_name = get_assumed_role_info() | |
| print("Successfully assumed ARN role") | |
| # print("Assumed Role ARN:", assumed_role_arn) | |
| # print("Assumed Role Name:", assumed_role_name) | |
| except Exception as e: | |
| print("Could not get assumed role from STS:", e) | |
| # Download direct from S3 - requires login credentials | |
| def download_file_from_s3( | |
| bucket_name: str, | |
| key: str, | |
| local_file_path_and_name: str, | |
| RUN_AWS_FUNCTIONS: bool = RUN_AWS_FUNCTIONS, | |
| ): | |
| if RUN_AWS_FUNCTIONS: | |
| try: | |
| # Ensure the local directory exists | |
| os.makedirs(os.path.dirname(local_file_path_and_name), exist_ok=True) | |
| s3 = boto3.client("s3", region_name=AWS_REGION) | |
| s3.download_file(bucket_name, key, local_file_path_and_name) | |
| print( | |
| f"File downloaded from s3://{bucket_name}/{key} to {local_file_path_and_name}" | |
| ) | |
| except Exception as e: | |
| print("Could not download file:", key, "from s3 due to", e) | |
| def download_folder_from_s3( | |
| bucket_name: str, | |
| s3_folder: str, | |
| local_folder: str, | |
| RUN_AWS_FUNCTIONS: bool = RUN_AWS_FUNCTIONS, | |
| ): | |
| """ | |
| Download all files from an S3 folder to a local folder. | |
| """ | |
| if RUN_AWS_FUNCTIONS: | |
| if bucket_name and s3_folder and local_folder: | |
| s3 = boto3.client("s3", region_name=AWS_REGION) | |
| # List objects in the specified S3 folder | |
| response = s3.list_objects_v2(Bucket=bucket_name, Prefix=s3_folder) | |
| # Download each object | |
| for obj in response.get("Contents", []): | |
| # Extract object key and construct local file path | |
| object_key = obj["Key"] | |
| local_file_path = secure_join( | |
| local_folder, os.path.relpath(object_key, s3_folder) | |
| ) | |
| # Create directories if necessary | |
| os.makedirs(os.path.dirname(local_file_path), exist_ok=True) | |
| # Download the object | |
| try: | |
| s3.download_file(bucket_name, object_key, local_file_path) | |
| print( | |
| f"Downloaded 's3://{bucket_name}/{object_key}' to '{local_file_path}'" | |
| ) | |
| except Exception as e: | |
| print(f"Error downloading 's3://{bucket_name}/{object_key}':", e) | |
| else: | |
| print( | |
| "One or more required variables are empty, could not download from S3" | |
| ) | |
| def download_files_from_s3( | |
| bucket_name: str, | |
| s3_folder: str, | |
| local_folder: str, | |
| filenames: List[str], | |
| RUN_AWS_FUNCTIONS: bool = RUN_AWS_FUNCTIONS, | |
| ): | |
| """ | |
| Download specific files from an S3 folder to a local folder. | |
| """ | |
| if RUN_AWS_FUNCTIONS: | |
| if bucket_name and s3_folder and local_folder and filenames: | |
| s3 = boto3.client("s3", region_name=AWS_REGION) | |
| print("Trying to download file: ", filenames) | |
| if filenames == "*": | |
| # List all objects in the S3 folder | |
| print("Trying to download all files in AWS folder: ", s3_folder) | |
| response = s3.list_objects_v2(Bucket=bucket_name, Prefix=s3_folder) | |
| print("Found files in AWS folder: ", response.get("Contents", [])) | |
| filenames = [ | |
| obj["Key"].split("/")[-1] for obj in response.get("Contents", []) | |
| ] | |
| print("Found filenames in AWS folder: ", filenames) | |
| for filename in filenames: | |
| object_key = secure_join(s3_folder, filename) | |
| local_file_path = secure_join(local_folder, filename) | |
| # Create directories if necessary | |
| os.makedirs(os.path.dirname(local_file_path), exist_ok=True) | |
| # Download the object | |
| try: | |
| s3.download_file(bucket_name, object_key, local_file_path) | |
| print( | |
| f"Downloaded 's3://{bucket_name}/{object_key}' to '{local_file_path}'" | |
| ) | |
| except Exception as e: | |
| print(f"Error downloading 's3://{bucket_name}/{object_key}':", e) | |
| else: | |
| print( | |
| "One or more required variables are empty, could not download from S3" | |
| ) | |
| def upload_file_to_s3( | |
| local_file_paths: List[str], | |
| s3_key: str, | |
| s3_bucket: str = DOCUMENT_REDACTION_BUCKET, | |
| RUN_AWS_FUNCTIONS: bool = RUN_AWS_FUNCTIONS, | |
| ): | |
| """ | |
| Uploads a file from local machine to Amazon S3. | |
| Args: | |
| - local_file_path: Local file path(s) of the file(s) to upload. | |
| - s3_key: Key (path) to the file in the S3 bucket. | |
| - s3_bucket: Name of the S3 bucket. | |
| Returns: | |
| - Message as variable/printed to console | |
| """ | |
| final_out_message = list() | |
| final_out_message_str = "" | |
| if RUN_AWS_FUNCTIONS: | |
| try: | |
| if s3_bucket and s3_key and local_file_paths: | |
| s3_client = boto3.client("s3", region_name=AWS_REGION) | |
| if isinstance(local_file_paths, str): | |
| local_file_paths = [local_file_paths] | |
| for file in local_file_paths: | |
| if s3_client: | |
| # print(s3_client) | |
| try: | |
| # Get file name off file path | |
| file_name = os.path.basename(file) | |
| s3_key_full = s3_key + file_name | |
| # print("S3 key: ", s3_bucket, "/", s3_key_full, sep="") | |
| s3_client.upload_file(file, s3_bucket, s3_key_full) | |
| out_message = ( | |
| "File " + file_name + " uploaded successfully!" | |
| ) | |
| except Exception as e: | |
| out_message = f"Error uploading file(s): {e}" | |
| print(out_message) | |
| final_out_message.append(out_message) | |
| final_out_message_str = "\n".join(final_out_message) | |
| else: | |
| final_out_message_str = "Could not connect to AWS." | |
| else: | |
| final_out_message_str = ( | |
| "At least one essential variable is empty, could not upload to S3" | |
| ) | |
| except Exception as e: | |
| final_out_message_str = "Could not upload files to S3 due to: " + str(e) | |
| print(final_out_message_str) | |
| else: | |
| final_out_message_str = "App not set to run AWS functions" | |
| return final_out_message_str | |
| def upload_log_file_to_s3( | |
| local_file_paths: List[str], | |
| s3_key: str, | |
| s3_bucket: str = DOCUMENT_REDACTION_BUCKET, | |
| RUN_AWS_FUNCTIONS: bool = RUN_AWS_FUNCTIONS, | |
| SAVE_LOGS_TO_CSV: bool = SAVE_LOGS_TO_CSV, | |
| ): | |
| """ | |
| Uploads a log file from local machine to Amazon S3. | |
| Args: | |
| - local_file_path: Local file path(s) of the file(s) to upload. | |
| - s3_key: Key (path) to the file in the S3 bucket. | |
| - s3_bucket: Name of the S3 bucket. | |
| Returns: | |
| - Message as variable/printed to console | |
| """ | |
| final_out_message = list() | |
| final_out_message_str = "" | |
| if RUN_AWS_FUNCTIONS and SAVE_LOGS_TO_CSV: | |
| try: | |
| if s3_bucket and s3_key and local_file_paths: | |
| s3_client = boto3.client("s3", region_name=AWS_REGION) | |
| if isinstance(local_file_paths, str): | |
| local_file_paths = [local_file_paths] | |
| for file in local_file_paths: | |
| if s3_client: | |
| # print(s3_client) | |
| try: | |
| # Get file name off file path | |
| file_name = os.path.basename(file) | |
| s3_key_full = s3_key + file_name | |
| s3_client.upload_file(file, s3_bucket, s3_key_full) | |
| out_message = ( | |
| "File " + file_name + " uploaded successfully!" | |
| ) | |
| # print(out_message) | |
| except Exception as e: | |
| out_message = f"Error uploading file(s): {e}" | |
| print(out_message) | |
| final_out_message.append(out_message) | |
| final_out_message_str = "\n".join(final_out_message) | |
| else: | |
| final_out_message_str = "Could not connect to AWS." | |
| else: | |
| final_out_message_str = ( | |
| "At least one essential variable is empty, could not upload to S3" | |
| ) | |
| except Exception as e: | |
| final_out_message_str = "Could not upload files to S3 due to: " + str(e) | |
| print(final_out_message_str) | |
| else: | |
| final_out_message_str = "App not set to run AWS functions" | |
| return final_out_message_str | |
| # Helper to upload outputs to S3 when enabled in config. | |
| def export_outputs_to_s3( | |
| file_list_state, | |
| s3_output_folder_state_value: str, | |
| save_outputs_to_s3_flag: bool, | |
| base_file_state=None, | |
| s3_bucket: str = S3_OUTPUTS_BUCKET, | |
| ): | |
| """ | |
| Upload a list of local output files to the configured S3 outputs folder. | |
| - file_list_state: Gradio dropdown state that holds a list of file paths or a | |
| single path/string. If blank/empty, no action is taken. | |
| - s3_output_folder_state_value: Final S3 key prefix (including any session hash) | |
| to use as the destination folder for uploads. | |
| - s3_bucket: Name of the S3 bucket. | |
| """ | |
| try: | |
| # Respect the runtime toggle as well as environment configuration | |
| if not save_outputs_to_s3_flag: | |
| return | |
| if not s3_output_folder_state_value: | |
| # No configured S3 outputs folder – nothing to do | |
| return | |
| # Normalise input to a Python list of strings | |
| file_paths = file_list_state | |
| if not file_paths: | |
| return | |
| # Gradio dropdown may return a single string or a list | |
| if isinstance(file_paths, str): | |
| file_paths = [file_paths] | |
| # Filter out any non-truthy values | |
| file_paths = [p for p in file_paths if p] | |
| if not file_paths: | |
| return | |
| # Derive a base file stem (name without extension) from the original | |
| # file(s) being analysed, if provided. This is used to create an | |
| # additional subfolder layer so that outputs are grouped under the | |
| # analysed file name rather than under each output file name. | |
| base_stem = None | |
| if base_file_state: | |
| base_path = None | |
| # Gradio File components typically provide a list of objects with a `.name` attribute | |
| if isinstance(base_file_state, str): | |
| base_path = base_file_state | |
| elif isinstance(base_file_state, list) and base_file_state: | |
| first_item = base_file_state[0] | |
| base_path = getattr(first_item, "name", None) or str(first_item) | |
| else: | |
| base_path = getattr(base_file_state, "name", None) or str( | |
| base_file_state | |
| ) | |
| if base_path: | |
| base_name = os.path.basename(base_path) | |
| base_stem, _ = os.path.splitext(base_name) | |
| # Ensure base S3 prefix (session/date) ends with a trailing slash | |
| base_prefix = s3_output_folder_state_value | |
| if not base_prefix.endswith("/"): | |
| base_prefix = base_prefix + "/" | |
| # For each file, append a subfolder. If we have a derived base_stem | |
| # from the input being analysed, use that; otherwise, fall back to | |
| # the individual output file name stem. Final pattern: | |
| # <session_output_folder>/<date>/<base_file_stem>/<file_name> | |
| # or, if base_file_stem is not available: | |
| # <session_output_folder>/<date>/<output_file_stem>/<file_name> | |
| for file in file_paths: | |
| file_name = os.path.basename(file) | |
| if base_stem: | |
| folder_stem = base_stem | |
| else: | |
| folder_stem, _ = os.path.splitext(file_name) | |
| per_file_prefix = base_prefix + folder_stem + "/" | |
| out_message = upload_file_to_s3( | |
| local_file_paths=[file], | |
| s3_key=per_file_prefix, | |
| s3_bucket=s3_bucket, | |
| ) | |
| # Log any issues to console so failures are visible in logs/stdout | |
| if ( | |
| "Error uploading file" in out_message | |
| or "could not upload" in out_message.lower() | |
| ): | |
| print("export_outputs_to_s3 encountered issues:", out_message) | |
| print("Successfully uploaded outputs to S3") | |
| except Exception as e: | |
| # Do not break the app flow if S3 upload fails – just report to console | |
| print(f"export_outputs_to_s3 failed with error: {e}") | |
| # No GUI outputs to update | |
| return | |