Spaces:

Corin1998
/

HFResumeIntakeSystem_DC

Sleeping

Create utils.py

1a1b2af verified 20 days ago

794 Bytes

	import io
	import docx

	def detect_filetype(filename: str, file_bytes: bytes) -> str:
	fname = (filename or "").lower()
	if fname.endswith(".pdf"):
	return "pdf"
	if any(fname.endswith(ext) for ext in [".png", ".jpg", ".jpeg", ".tiff", ".bmp"]):
	return "image"
	if fname.endswith(".docx"):
	return "docx"
	if fname.endswith(".txt"):
	return "txt"
	if file_bytes[:4] == b"%PDF":
	return "pdf"
	return "unknown"

	def load_doc_text(filetype: str, file_bytes: bytes) -> str:
	if filetype == "docx":
	f = io.BytesIO(file_bytes)
	doc = docx.Document(f)
	return "\n".join([p.text for p in doc.paragraphs])
	elif filetype == "txt":
	return file_bytes.decode("utf-8", errors="ignore")
	else:
	return ""