Spaces:

ghost-logic
/

DnD_Campaign_Manager

Sleeping

DnD_Campaign_Manager / src /utils /file_parsers.py

official.ghost.logic

Deploy D&D Campaign Manager v2

71b378e 16 days ago

3.42 kB

	"""
	File Parsing Utilities for Session Notes Upload
	Supports: .txt, .md, .docx, .pdf
	"""
	from pathlib import Path
	from typing import Optional


	def parse_uploaded_file(file_path: str) -> str:
	"""
	Parse uploaded session notes file and extract text content.

	Supported formats:
	- .txt - Plain text
	- .md - Markdown
	- .docx - Microsoft Word (requires python-docx)
	- .pdf - PDF documents (requires PyPDF2)

	Args:
	file_path: Path to uploaded file

	Returns:
	Extracted text content as string

	Raises:
	ValueError: If file type is not supported
	Exception: If file parsing fails
	"""
	path = Path(file_path)
	extension = path.suffix.lower()

	# Plain text and markdown (simple read)
	if extension in ['.txt', '.md']:
	try:
	with open(file_path, 'r', encoding='utf-8') as f:
	return f.read()
	except UnicodeDecodeError:
	# Try with different encoding if UTF-8 fails
	with open(file_path, 'r', encoding='latin-1') as f:
	return f.read()

	# Microsoft Word documents
	elif extension == '.docx':
	try:
	import docx
	doc = docx.Document(file_path)
	paragraphs = [para.text for para in doc.paragraphs]
	return '\n'.join(paragraphs)
	except ImportError:
	raise ImportError(
	"python-docx is required to parse .docx files. "
	"Install with: pip install python-docx"
	)
	except Exception as e:
	raise Exception(f"Error parsing .docx file: {str(e)}")

	# PDF documents
	elif extension == '.pdf':
	try:
	import PyPDF2
	text_content = []

	with open(file_path, 'rb') as f:
	pdf_reader = PyPDF2.PdfReader(f)

	for page in pdf_reader.pages:
	text = page.extract_text()
	if text:
	text_content.append(text)

	return '\n'.join(text_content)

	except ImportError:
	raise ImportError(
	"PyPDF2 is required to parse .pdf files. "
	"Install with: pip install PyPDF2"
	)
	except Exception as e:
	raise Exception(f"Error parsing .pdf file: {str(e)}")

	else:
	raise ValueError(
	f"Unsupported file type: {extension}. "
	f"Supported formats: .txt, .md, .docx, .pdf"
	)


	def validate_file_size(file_path: str, max_size_mb: int = 10) -> bool:
	"""
	Validate that file size is within acceptable limits.

	Args:
	file_path: Path to file
	max_size_mb: Maximum file size in megabytes (default: 10 MB)

	Returns:
	True if file size is acceptable, False otherwise
	"""
	path = Path(file_path)

	if not path.exists():
	return False

	file_size_mb = path.stat().st_size / (1024 * 1024)
	return file_size_mb <= max_size_mb


	def get_file_info(file_path: str) -> dict:
	"""
	Get information about uploaded file.

	Args:
	file_path: Path to file

	Returns:
	Dictionary with file information
	"""
	path = Path(file_path)

	return {
	'name': path.name,
	'extension': path.suffix.lower(),
	'size_bytes': path.stat().st_size,
	'size_mb': round(path.stat().st_size / (1024 * 1024), 2)
	}