Spaces:

pawanmau01
/

TestAPI

Sleeping

pawandev commited on Jul 19, 2024

Commit

43ca488

1 Parent(s): d62425f

Added new pan model and change ocrsetting and extraction regex as per new model

Files changed (4) hide show

app/__init__.py CHANGED Viewed

@@ -10,7 +10,7 @@ def create_app():
         # Load model once
         app.models = {
             'adhaarModel': YOLO('models/aadhaarYolov8.pt'),
-            'panModel': YOLO('models/PanYolo_v4.pt')  # Load additional models as needed
         }
     return app

         # Load model once
         app.models = {
             'adhaarModel': YOLO('models/aadhaarYolov8.pt'),
+            'panModel': YOLO('models/PanModel_v5.pt')  # Load additional models as needed
         }
     return app

app/routes/panApi.py CHANGED Viewed

@@ -28,6 +28,7 @@ def ocrPan(mode, session):
                 img_data = base64.b64decode(imgBuffer)
                 img = Image.open(BytesIO(img_data))
                 img.verify()  # Verify image format
                 img = Image.open(io.BytesIO(img_data))  # Re-open image after verification
             except (base64.binascii.Error, ValueError) as decode_err:
                 return jsonify({"error": f"Image decoding failed: {str(decode_err)}"}), 400
@@ -42,6 +43,7 @@ def ocrPan(mode, session):
             response = session.get(img_url)
             response.raise_for_status()
             img = Image.open(BytesIO(response.content))
             img.verify()  # Verify image format
             img = Image.open(BytesIO(response.content))  # Re-open image after verification
@@ -54,7 +56,7 @@ def ocrPan(mode, session):
         # Run detection
         model = current_app.models.get('panModel')
-        results = model.predict(source=img, save=False)
         # print(results,"model result")
         extracted_data = process_results(results, img)
         # print(extracted_data, "extracted data")

                 img_data = base64.b64decode(imgBuffer)
                 img = Image.open(BytesIO(img_data))
                 img.verify()  # Verify image format
+                print(img, "img")
                 img = Image.open(io.BytesIO(img_data))  # Re-open image after verification
             except (base64.binascii.Error, ValueError) as decode_err:
                 return jsonify({"error": f"Image decoding failed: {str(decode_err)}"}), 400
             response = session.get(img_url)
             response.raise_for_status()
             img = Image.open(BytesIO(response.content))
+            print(img, "img")
             img.verify()  # Verify image format
             img = Image.open(BytesIO(response.content))  # Re-open image after verification
         # Run detection
         model = current_app.models.get('panModel')
+        results = model.predict(source=img, imgsz=680, iou=0.7, augment=True)
         # print(results,"model result")
         extracted_data = process_results(results, img)
         # print(extracted_data, "extracted data")

app/services/panServices/panDataExtractor.py CHANGED Viewed

@@ -3,14 +3,14 @@ def filter_array(arr):
     # Define the regex patterns
     pattern_alphanumeric_special = re.compile(r'[\w]+[^.\s\w]+|[^.\s\w]+[\w]+')
     pattern_numeric = re.compile(r'^[0-9]+$')
-    pattern_non_alpha = re.compile(r'[^.\s]*[^a-zA-Z\s][^.\s]*')
     # Filter the array
     filtered_array = [
         item for item in arr
         if not (pattern_alphanumeric_special.search(item) or
                 pattern_numeric.match(item) or
-                pattern_non_alpha.search(item))
     ]
     return filtered_array
@@ -20,7 +20,8 @@ def extract_panData(data):
                       "VIT VE Hra / Father's Nama", 'पिता का नाम/ Fal', 'पिता का नाम / Fathe', "पिता का नाम / Father's Na",
                       'जन्म की तारीख /।', 'जन्म का ताराख', "पिता का नाम/ Father's Nam", 'नाम /Name', "पिता का नाम / Father's Name",
                       'जन्म का वाराज़', 'Date of Birth', 'Permanent Account Number Card', "Date of Birth", "/Date of Birth",
-                      "Permanent Account Number", "Father's Name", "14 /Name", "/Father's Name", 'HTH / Name']
@@ -47,7 +48,7 @@ def extract_panData(data):
     # Check and extract PAN number
-    pan_pattern = re.compile(r'^[A-Z]{5}[0-9]{4}[A-Z]$')
     for item in cleaned_data:
         if pan_pattern.match(item):
             result["data"]["panNo"] = item

     # Define the regex patterns
     pattern_alphanumeric_special = re.compile(r'[\w]+[^.\s\w]+|[^.\s\w]+[\w]+')
     pattern_numeric = re.compile(r'^[0-9]+$')
+    pattern_special_chars = re.compile(r'[^a-zA-Z.\s]+')
     # Filter the array
     filtered_array = [
         item for item in arr
         if not (pattern_alphanumeric_special.search(item) or
                 pattern_numeric.match(item) or
+                pattern_special_chars.search(item))
     ]
     return filtered_array
                       "VIT VE Hra / Father's Nama", 'पिता का नाम/ Fal', 'पिता का नाम / Fathe', "पिता का नाम / Father's Na",
                       'जन्म की तारीख /।', 'जन्म का ताराख', "पिता का नाम/ Father's Nam", 'नाम /Name', "पिता का नाम / Father's Name",
                       'जन्म का वाराज़', 'Date of Birth', 'Permanent Account Number Card', "Date of Birth", "/Date of Birth",
+                      "Permanent Account Number", "Father's Name", "14 /Name", "/Father's Name", 'HTH / Name',"inent Account Number", "anent Account Number C","Permanent Account Number Car",
+                      'ugr Name']
     # Check and extract PAN number
+    pan_pattern = re.compile(r'^[A-Z]{5}\s*[0-9]{4}\s*[A-Z]$')
     for item in cleaned_data:
         if pan_pattern.match(item):
             result["data"]["panNo"] = item

app/services/panServices/panOcr.py CHANGED Viewed

@@ -6,7 +6,7 @@ from .panDataExtractor import extract_panData
 def process_results(results, img):
     label_indices = {"pan_num": 0, "name": 1, "father": 2, "dob": 3}
-    confidence_threshold = 0.3
     input_image_format = img.format if img.format else "PNG"
     valid_formats = ["JPEG", "PNG", "BMP", "GIF", "TIFF"]
     input_image_format = input_image_format if input_image_format in valid_formats else "PNG"

 def process_results(results, img):
     label_indices = {"pan_num": 0, "name": 1, "father": 2, "dob": 3}
+    confidence_threshold = 0.4
     input_image_format = img.format if img.format else "PNG"
     valid_formats = ["JPEG", "PNG", "BMP", "GIF", "TIFF"]
     input_image_format = input_image_format if input_image_format in valid_formats else "PNG"