File size: 3,954 Bytes
d790e98 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 |
import re
import numpy as np
import cv2
from PIL import Image
import random
import torch
import torchvision.transforms as T
from torchvision.transforms.functional import InterpolationMode
from difflib import SequenceMatcher
from nltk.metrics.distance import edit_distance
import nltk
# Ensure NLTK data is downloaded
try:
nltk.data.find('corpora/words.zip')
except LookupError:
nltk.download('words')
try:
nltk.data.find('tokenizers/punkt')
except LookupError:
nltk.download('punkt')
from nltk.corpus import words
def set_seed(seed=42):
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
# torch.cuda.manual_seed_all(seed) # Uncomment if using GPU
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
def build_transform(input_size=448):
mean = (0.485, 0.456, 0.406)
std = (0.229, 0.224, 0.225)
return T.Compose([
T.Lambda(lambda img: img.convert('RGB')),
T.Resize((input_size, input_size), interpolation=InterpolationMode.BICUBIC),
T.ToTensor(),
T.Normalize(mean=mean, std=std)
])
def get_roi(image_path_or_obj, *roi):
"""
Extracts ROI from an image path or PIL Image object.
"""
if isinstance(image_path_or_obj, str):
image = Image.open(image_path_or_obj).convert('RGB')
else:
image = image_path_or_obj.convert('RGB')
width, height = image.size
roi_x_start = int(width * roi[0])
roi_y_start = int(height * roi[1])
roi_x_end = int(width * roi[2])
roi_y_end = int(height * roi[3])
cropped_image = image.crop((roi_x_start, roi_y_start, roi_x_end, roi_y_end))
return cropped_image
def clean_text(text):
return re.sub(r'[^a-zA-Z0-9]', '', text).strip().lower()
def are_strings_similar(str1, str2, max_distance=3, max_length_diff=2):
if str1 == str2:
return True
if abs(len(str1) - len(str2)) > max_length_diff:
return False
edit_distance_value = edit_distance(str1, str2)
return edit_distance_value <= max_distance
def blur_image(image, strength):
image_np = np.array(image)
blur_strength = int(strength * 50)
blur_strength = max(1, blur_strength | 1)
blurred_image = cv2.GaussianBlur(image_np, (blur_strength, blur_strength), 0)
blurred_pil_image = Image.fromarray(blurred_image)
return blurred_pil_image
def is_blank(text, limit=15):
return len(text) < limit
def string_similarity(a, b):
return SequenceMatcher(None, a.lower(), b.lower()).ratio()
def find_similar_substring(text, keyword, threshold=0.9):
text = text.lower()
keyword = keyword.lower()
if keyword in text:
return True
keyword_length = len(keyword.split())
words_list = text.split()
for i in range(len(words_list) - keyword_length + 1):
phrase = ' '.join(words_list[i:i + keyword_length])
similarity = string_similarity(phrase, keyword)
if similarity >= threshold:
return True
return False
def destroy_text_roi(image, *roi_params):
image_np = np.array(image)
h, w, _ = image_np.shape
x1 = int(roi_params[0] * w)
y1 = int(roi_params[1] * h)
x2 = int(roi_params[2] * w)
y2 = int(roi_params[3] * h)
roi = image_np[y1:y2, x1:x2]
blurred_roi = cv2.GaussianBlur(roi, (75, 75), 0)
noise = np.random.randint(0, 50, (blurred_roi.shape[0], blurred_roi.shape[1], 3), dtype=np.uint8)
noisy_blurred_roi = cv2.add(blurred_roi, noise)
image_np[y1:y2, x1:x2] = noisy_blurred_roi
return Image.fromarray(image_np)
def is_english(text):
allowed_pattern = re.compile(
r'^[a-zA-Z०-९\u0930\s\.,!?\-;:"\'()]*$'
)
return bool(allowed_pattern.match(text))
def is_valid_english(text):
english_words = set(words.words())
cleaned_words = ''.join(c.lower() if c.isalnum() else ' ' for c in text).split()
return all(word.lower() in english_words for word in cleaned_words)
|