Spaces:

MCP-1st-Birthday
/

LifeAdmin-AI

Running

App Files Files Community

LifeAdmin-AI / tools /ocr_server.py

Maheen001

Create tools/ocr_server.py

3fe84f5 verified 13 days ago

raw

history blame

2.29 kB

	import asyncio
	from pathlib import Path
	from PIL import Image
	import pytesseract
	import easyocr
	import io


	async def extract_text_ocr(file_path: str, language: str = 'en') -> dict:
	"""
	Extract text from image using OCR

	Args:
	file_path: Path to image file
	language: Language code for OCR

	Returns:
	Dict with extracted text and confidence
	"""
	try:
	# Try EasyOCR first (better quality)
	reader = easyocr.Reader([language], gpu=False)
	result = reader.readtext(file_path)

	text_lines = [detection[1] for detection in result]
	confidences = [detection[2] for detection in result]

	full_text = '\n'.join(text_lines)
	avg_confidence = sum(confidences) / len(confidences) if confidences else 0

	return {
	'text': full_text,
	'confidence': avg_confidence,
	'line_count': len(text_lines),
	'method': 'easyocr'
	}

	except Exception as e:
	# Fallback to Tesseract
	try:
	img = Image.open(file_path)
	text = pytesseract.image_to_string(img, lang=language)

	return {
	'text': text,
	'confidence': 0.8, # Estimated
	'line_count': len(text.split('\n')),
	'method': 'tesseract'
	}
	except Exception as e2:
	return {
	'text': '',
	'error': f"OCR failed: {str(e)}, {str(e2)}",
	'confidence': 0
	}


	async def extract_text_from_pdf_image(pdf_path: str, page_num: int = 0) -> dict:
	"""Extract text from scanned PDF using OCR"""
	try:
	from pdf2image import convert_from_path

	images = convert_from_path(pdf_path, first_page=page_num+1, last_page=page_num+1)

	if not images:
	return {'text': '', 'error': 'No pages found'}

	# Save temp image
	temp_path = f"/tmp/page_{page_num}.png"
	images[0].save(temp_path, 'PNG')

	# Extract text
	result = await extract_text_ocr(temp_path)

	return result

	except Exception as e:
	return {'text': '', 'error': str(e)}