Spaces:
Sleeping
Sleeping
pawandev
commited on
Commit
·
43ca488
1
Parent(s):
d62425f
Added new pan model and change ocrsetting and extraction regex as per new model
Browse files
app/__init__.py
CHANGED
|
@@ -10,7 +10,7 @@ def create_app():
|
|
| 10 |
# Load model once
|
| 11 |
app.models = {
|
| 12 |
'adhaarModel': YOLO('models/aadhaarYolov8.pt'),
|
| 13 |
-
'panModel': YOLO('models/
|
| 14 |
}
|
| 15 |
|
| 16 |
return app
|
|
|
|
| 10 |
# Load model once
|
| 11 |
app.models = {
|
| 12 |
'adhaarModel': YOLO('models/aadhaarYolov8.pt'),
|
| 13 |
+
'panModel': YOLO('models/PanModel_v5.pt') # Load additional models as needed
|
| 14 |
}
|
| 15 |
|
| 16 |
return app
|
app/routes/panApi.py
CHANGED
|
@@ -28,6 +28,7 @@ def ocrPan(mode, session):
|
|
| 28 |
img_data = base64.b64decode(imgBuffer)
|
| 29 |
img = Image.open(BytesIO(img_data))
|
| 30 |
img.verify() # Verify image format
|
|
|
|
| 31 |
img = Image.open(io.BytesIO(img_data)) # Re-open image after verification
|
| 32 |
except (base64.binascii.Error, ValueError) as decode_err:
|
| 33 |
return jsonify({"error": f"Image decoding failed: {str(decode_err)}"}), 400
|
|
@@ -42,6 +43,7 @@ def ocrPan(mode, session):
|
|
| 42 |
response = session.get(img_url)
|
| 43 |
response.raise_for_status()
|
| 44 |
img = Image.open(BytesIO(response.content))
|
|
|
|
| 45 |
img.verify() # Verify image format
|
| 46 |
img = Image.open(BytesIO(response.content)) # Re-open image after verification
|
| 47 |
|
|
@@ -54,7 +56,7 @@ def ocrPan(mode, session):
|
|
| 54 |
|
| 55 |
# Run detection
|
| 56 |
model = current_app.models.get('panModel')
|
| 57 |
-
results = model.predict(source=img,
|
| 58 |
# print(results,"model result")
|
| 59 |
extracted_data = process_results(results, img)
|
| 60 |
# print(extracted_data, "extracted data")
|
|
|
|
| 28 |
img_data = base64.b64decode(imgBuffer)
|
| 29 |
img = Image.open(BytesIO(img_data))
|
| 30 |
img.verify() # Verify image format
|
| 31 |
+
print(img, "img")
|
| 32 |
img = Image.open(io.BytesIO(img_data)) # Re-open image after verification
|
| 33 |
except (base64.binascii.Error, ValueError) as decode_err:
|
| 34 |
return jsonify({"error": f"Image decoding failed: {str(decode_err)}"}), 400
|
|
|
|
| 43 |
response = session.get(img_url)
|
| 44 |
response.raise_for_status()
|
| 45 |
img = Image.open(BytesIO(response.content))
|
| 46 |
+
print(img, "img")
|
| 47 |
img.verify() # Verify image format
|
| 48 |
img = Image.open(BytesIO(response.content)) # Re-open image after verification
|
| 49 |
|
|
|
|
| 56 |
|
| 57 |
# Run detection
|
| 58 |
model = current_app.models.get('panModel')
|
| 59 |
+
results = model.predict(source=img, imgsz=680, iou=0.7, augment=True)
|
| 60 |
# print(results,"model result")
|
| 61 |
extracted_data = process_results(results, img)
|
| 62 |
# print(extracted_data, "extracted data")
|
app/services/panServices/panDataExtractor.py
CHANGED
|
@@ -3,14 +3,14 @@ def filter_array(arr):
|
|
| 3 |
# Define the regex patterns
|
| 4 |
pattern_alphanumeric_special = re.compile(r'[\w]+[^.\s\w]+|[^.\s\w]+[\w]+')
|
| 5 |
pattern_numeric = re.compile(r'^[0-9]+$')
|
| 6 |
-
|
| 7 |
|
| 8 |
# Filter the array
|
| 9 |
filtered_array = [
|
| 10 |
item for item in arr
|
| 11 |
if not (pattern_alphanumeric_special.search(item) or
|
| 12 |
pattern_numeric.match(item) or
|
| 13 |
-
|
| 14 |
]
|
| 15 |
return filtered_array
|
| 16 |
|
|
@@ -20,7 +20,8 @@ def extract_panData(data):
|
|
| 20 |
"VIT VE Hra / Father's Nama", 'पिता का नाम/ Fal', 'पिता का नाम / Fathe', "पिता का नाम / Father's Na",
|
| 21 |
'जन्म की तारीख /।', 'जन्म का ताराख', "पिता का नाम/ Father's Nam", 'नाम /Name', "पिता का नाम / Father's Name",
|
| 22 |
'जन्म का वाराज़', 'Date of Birth', 'Permanent Account Number Card', "Date of Birth", "/Date of Birth",
|
| 23 |
-
"Permanent Account Number", "Father's Name", "14 /Name", "/Father's Name", 'HTH / Name'
|
|
|
|
| 24 |
|
| 25 |
|
| 26 |
|
|
@@ -47,7 +48,7 @@ def extract_panData(data):
|
|
| 47 |
|
| 48 |
|
| 49 |
# Check and extract PAN number
|
| 50 |
-
pan_pattern = re.compile(r'^[A-Z]{5}[0-9]{4}[A-Z]$')
|
| 51 |
for item in cleaned_data:
|
| 52 |
if pan_pattern.match(item):
|
| 53 |
result["data"]["panNo"] = item
|
|
|
|
| 3 |
# Define the regex patterns
|
| 4 |
pattern_alphanumeric_special = re.compile(r'[\w]+[^.\s\w]+|[^.\s\w]+[\w]+')
|
| 5 |
pattern_numeric = re.compile(r'^[0-9]+$')
|
| 6 |
+
pattern_special_chars = re.compile(r'[^a-zA-Z.\s]+')
|
| 7 |
|
| 8 |
# Filter the array
|
| 9 |
filtered_array = [
|
| 10 |
item for item in arr
|
| 11 |
if not (pattern_alphanumeric_special.search(item) or
|
| 12 |
pattern_numeric.match(item) or
|
| 13 |
+
pattern_special_chars.search(item))
|
| 14 |
]
|
| 15 |
return filtered_array
|
| 16 |
|
|
|
|
| 20 |
"VIT VE Hra / Father's Nama", 'पिता का नाम/ Fal', 'पिता का नाम / Fathe', "पिता का नाम / Father's Na",
|
| 21 |
'जन्म की तारीख /।', 'जन्म का ताराख', "पिता का नाम/ Father's Nam", 'नाम /Name', "पिता का नाम / Father's Name",
|
| 22 |
'जन्म का वाराज़', 'Date of Birth', 'Permanent Account Number Card', "Date of Birth", "/Date of Birth",
|
| 23 |
+
"Permanent Account Number", "Father's Name", "14 /Name", "/Father's Name", 'HTH / Name',"inent Account Number", "anent Account Number C","Permanent Account Number Car",
|
| 24 |
+
'ugr Name']
|
| 25 |
|
| 26 |
|
| 27 |
|
|
|
|
| 48 |
|
| 49 |
|
| 50 |
# Check and extract PAN number
|
| 51 |
+
pan_pattern = re.compile(r'^[A-Z]{5}\s*[0-9]{4}\s*[A-Z]$')
|
| 52 |
for item in cleaned_data:
|
| 53 |
if pan_pattern.match(item):
|
| 54 |
result["data"]["panNo"] = item
|
app/services/panServices/panOcr.py
CHANGED
|
@@ -6,7 +6,7 @@ from .panDataExtractor import extract_panData
|
|
| 6 |
|
| 7 |
def process_results(results, img):
|
| 8 |
label_indices = {"pan_num": 0, "name": 1, "father": 2, "dob": 3}
|
| 9 |
-
confidence_threshold = 0.
|
| 10 |
input_image_format = img.format if img.format else "PNG"
|
| 11 |
valid_formats = ["JPEG", "PNG", "BMP", "GIF", "TIFF"]
|
| 12 |
input_image_format = input_image_format if input_image_format in valid_formats else "PNG"
|
|
|
|
| 6 |
|
| 7 |
def process_results(results, img):
|
| 8 |
label_indices = {"pan_num": 0, "name": 1, "father": 2, "dob": 3}
|
| 9 |
+
confidence_threshold = 0.4
|
| 10 |
input_image_format = img.format if img.format else "PNG"
|
| 11 |
valid_formats = ["JPEG", "PNG", "BMP", "GIF", "TIFF"]
|
| 12 |
input_image_format = input_image_format if input_image_format in valid_formats else "PNG"
|