Spaces:

AIAcceleratorLab
/

ocr

Sleeping

App Files Files Community

msmhmorsi commited on Jan 4

Commit

d24e4b0

1 Parent(s): 57f593d

added all

Browse files

Files changed (3) hide show

Dockerfile +16 -5
app.py +155 -5
requirements.txt +8 -2

Dockerfile CHANGED Viewed

@@ -1,16 +1,27 @@
-# Read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker
-# you will also find guides on how best to write your Dockerfile
 FROM python:3.9
 RUN useradd -m -u 1000 user
 USER user
 ENV PATH="/home/user/.local/bin:$PATH"
 WORKDIR /app
 COPY --chown=user ./requirements.txt requirements.txt
-RUN pip install --no-cache-dir --upgrade -r requirements.txt
-COPY --chown=user . /app
 CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]

+# Use the official Python image from the Docker Hub
 FROM python:3.9
 RUN useradd -m -u 1000 user
 USER user
 ENV PATH="/home/user/.local/bin:$PATH"
+# Install system dependencies for OpenCV
+RUN apt-get update && apt-get install -y \
+    libgl1-mesa-glx \
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/*
+# Set the working directory in the container
 WORKDIR /app
+# Copy the requirements file into the container
 COPY --chown=user ./requirements.txt requirements.txt
+# Install the dependencies
+RUN pip install --no-cache-dir -r requirements.txt
+# Copy the rest of the application code
+COPY . /app
 CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]

app.py CHANGED Viewed

@@ -1,7 +1,157 @@
-from fastapi import FastAPI
-app = FastAPI()
-@app.get("/")
-def greet_json():
-    return {"Hello": "World!"}

+import cv2
+import fitz
+import numpy as np
+from io import BytesIO
+import matplotlib.pyplot as plt
+from skimage.color import rgb2gray
+from skimage.measure import label, regionprops
+from fastapi.responses import StreamingResponse
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi import FastAPI, UploadFile, File, HTTPException
+app = FastAPI(
+    title="PDF Processing API",
+    description="API for converting PDF to PNG and enhancing images",
+    version="1.0.0"
+)
+# Add CORS middleware
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],  # Allows all origins
+    allow_credentials=True,
+    allow_methods=["*"],  # Allows all methods
+    allow_headers=["*"],  # Allows all headers
+)
+def convert_and_process_pdf(pdf_content: bytes, area_threshold: int = 100) -> BytesIO:
+    """
+    Convert the first page of a PDF to a PNG and apply image enhancement.
+    Args:
+        pdf_content: The PDF file content as bytes.
+        area_threshold: Threshold for area filtering (default: 100).
+    Returns:
+        BytesIO: Enhanced PNG image content.
+    """
+    # Open the PDF from bytes
+    doc = fitz.open(stream=pdf_content, filetype="pdf")
+    # Load the first page
+    page = doc.load_page(0)
+    # Render the page as an image
+    pix = page.get_pixmap(dpi=300)
+    png_image = pix.tobytes("png")
+    # Load the image with OpenCV
+    np_array = np.frombuffer(png_image, dtype=np.uint8)
+    img = cv2.imdecode(np_array, cv2.IMREAD_COLOR)
+    # Convert to grayscale
+    img_gray = rgb2gray(img)
+    # Convert grayscale to binary using Otsu's threshold
+    _, img_binary = cv2.threshold((img_gray * 255).astype(np.uint8), 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
+    # Invert the binary image
+    img_binary = ~img_binary
+    # Label connected components
+    label_img = label(img_binary)
+    regions = regionprops(label_img)
+    # Filter by area threshold
+    valid_labels = [region.label for region in regions if region.area >= area_threshold]
+    img_filtered = np.isin(label_img, valid_labels)
+    # Save enhanced image to memory
+    output_buffer = BytesIO()
+    plt.imsave(output_buffer, ~img_filtered, cmap="gray", format="png")
+    output_buffer.seek(0)
+    return output_buffer
+@app.post("/process-pdf/")
+async def process_pdf(
+    file: UploadFile = File(...),
+    area_threshold: int = 100
+):
+    """
+    Process a PDF file and return an enhanced PNG image.
+    Args:
+        file: The PDF file to process
+        area_threshold: Threshold for area filtering (default: 100)
+    Returns:
+        StreamingResponse: Enhanced PNG image
+    """
+    try:
+        # Read PDF file content
+        pdf_content = await file.read()
+        # Process the PDF and get the enhanced image
+        enhanced_image = convert_and_process_pdf(pdf_content, area_threshold)
+        # Return the processed image as a StreamingResponse
+        return StreamingResponse(
+            enhanced_image,
+            media_type="image/png",
+            headers={"Content-Disposition": f"attachment; filename={file.filename.rsplit('.', 1)[0]}_enhanced.png"}
+        )
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Error processing PDF: {str(e)}")
+@app.post("/process-image/")
+async def process_image(
+    file: UploadFile = File(...),
+    area_threshold: int = 100
+):
+    """
+    Process an image file and return an enhanced image.
+    Args:
+        file: The image file to process
+        area_threshold: Threshold for area filtering (default: 100)
+    Returns:
+        StreamingResponse: Enhanced image
+    """
+    try:
+        # Read image file content
+        image_content = await file.read()
+        # Convert to numpy array
+        np_array = np.frombuffer(image_content, dtype=np.uint8)
+        img = cv2.imdecode(np_array, cv2.IMREAD_COLOR)
+        # Convert to grayscale
+        img_gray = rgb2gray(img)
+        # Convert grayscale to binary using Otsu's threshold
+        _, img_binary = cv2.threshold((img_gray * 255).astype(np.uint8), 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
+        # Invert the binary image
+        img_binary = ~img_binary
+        # Label connected components
+        label_img = label(img_binary)
+        regions = regionprops(label_img)
+        # Filter by area threshold
+        valid_labels = [region.label for region in regions if region.area >= area_threshold]
+        img_filtered = np.isin(label_img, valid_labels)
+        # Save enhanced image to memory
+        output_buffer = BytesIO()
+        plt.imsave(output_buffer, ~img_filtered, cmap="gray", format="png")
+        output_buffer.seek(0)
+        # Return the processed image as a StreamingResponse
+        return StreamingResponse(
+            output_buffer,
+            media_type="image/png",
+            headers={"Content-Disposition": f"attachment; filename={file.filename.rsplit('.', 1)[0]}_enhanced.png"}
+        )
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Error processing image: {str(e)}")
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run(app, host="0.0.0.0", port=7860)

requirements.txt CHANGED Viewed

@@ -1,2 +1,8 @@
-fastapi
-uvicorn[standard]

+fastapi==0.104.1
+uvicorn==0.24.0
+python-multipart==0.0.6
+PyMuPDF==1.23.7
+opencv-python==4.8.1.78
+numpy==1.26.2
+scikit-image==0.22.0
+matplotlib==3.8.2