Spaces:
Running
Running
| """Test the new filename formatting""" | |
| import os | |
| import sys | |
| import datetime | |
| import inspect | |
| # Add the project root to the path so we can import modules | |
| sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) | |
| # Import the main utils.py file directly | |
| import utils as root_utils | |
| print(f"Imported utils from: {root_utils.__file__}") | |
| print("Current create_descriptive_filename implementation:") | |
| print(inspect.getsource(root_utils.create_descriptive_filename)) | |
| def main(): | |
| """Test the filename formatting""" | |
| # Sample inputs | |
| sample_files = [ | |
| "handwritten-letter.jpg", | |
| "magician-or-bottle-cungerer.jpg", | |
| "baldwin_15th_north.jpg", | |
| "harpers.pdf", | |
| "recipe.jpg" | |
| ] | |
| # Sample OCR results for testing | |
| sample_results = [ | |
| { | |
| "detected_document_type": "handwritten", | |
| "topics": ["Letter", "Handwritten", "19th Century", "Personal Correspondence"] | |
| }, | |
| { | |
| "topics": ["Newspaper", "Print", "19th Century", "Illustration", "Advertisement"] | |
| }, | |
| { | |
| "detected_document_type": "letter", | |
| "topics": ["Correspondence", "Early Modern", "English Language"] | |
| }, | |
| { | |
| "detected_document_type": "magazine", | |
| "topics": ["Publication", "Late 19th Century", "Magazine", "Historical"] | |
| }, | |
| { | |
| "detected_document_type": "recipe", | |
| "topics": ["Food", "Culinary", "Historical", "Instruction"] | |
| } | |
| ] | |
| print("\nIMPROVED FILENAME FORMATTING TEST") | |
| print("=" * 50) | |
| # Format current date manually | |
| current_date = datetime.datetime.now().strftime("%b %d, %Y") | |
| print(f"Current date for filenames: {current_date}") | |
| print("\nBEFORE vs AFTER Examples:\n") | |
| for i, (original_file, result) in enumerate(zip(sample_files, sample_results)): | |
| # Get file extension from original file | |
| file_ext = os.path.splitext(original_file)[1] | |
| # Generate the old style filename manually | |
| original_name = os.path.splitext(original_file)[0] | |
| doc_type_tag = "" | |
| if 'detected_document_type' in result: | |
| doc_type = result['detected_document_type'].lower() | |
| doc_type_tag = f"_{doc_type.replace(' ', '_')}" | |
| elif 'topics' in result and result['topics']: | |
| doc_type_tag = f"_{result['topics'][0].lower().replace(' ', '_')}" | |
| period_tag = "" | |
| if 'topics' in result and result['topics']: | |
| for tag in result['topics']: | |
| if "century" in tag.lower() or "pre-" in tag.lower() or "era" in tag.lower(): | |
| period_tag = f"_{tag.lower().replace(' ', '_')}" | |
| break | |
| old_filename = f"{original_name}{doc_type_tag}{period_tag}{file_ext}" | |
| # Generate the new descriptive filename with our improved formatter | |
| new_filename = root_utils.create_descriptive_filename(original_file, result, file_ext) | |
| print(f"Example {i+1}:") | |
| print(f" Original: {original_file}") | |
| print(f" Old Format: {old_filename}") | |
| print(f" New Format: {new_filename}") | |
| print() | |
| if __name__ == "__main__": | |
| main() | |