import re import numpy as np import cv2 from PIL import Image import random import torch import torchvision.transforms as T from torchvision.transforms.functional import InterpolationMode from difflib import SequenceMatcher from nltk.metrics.distance import edit_distance import nltk # Ensure NLTK data is downloaded try: nltk.data.find('corpora/words.zip') except LookupError: nltk.download('words') try: nltk.data.find('tokenizers/punkt') except LookupError: nltk.download('punkt') from nltk.corpus import words def set_seed(seed=42): random.seed(seed) np.random.seed(seed) torch.manual_seed(seed) # torch.cuda.manual_seed_all(seed) # Uncomment if using GPU torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = False def build_transform(input_size=448): mean = (0.485, 0.456, 0.406) std = (0.229, 0.224, 0.225) return T.Compose([ T.Lambda(lambda img: img.convert('RGB')), T.Resize((input_size, input_size), interpolation=InterpolationMode.BICUBIC), T.ToTensor(), T.Normalize(mean=mean, std=std) ]) def get_roi(image_path_or_obj, *roi): """ Extracts ROI from an image path or PIL Image object. """ if isinstance(image_path_or_obj, str): image = Image.open(image_path_or_obj).convert('RGB') else: image = image_path_or_obj.convert('RGB') width, height = image.size roi_x_start = int(width * roi[0]) roi_y_start = int(height * roi[1]) roi_x_end = int(width * roi[2]) roi_y_end = int(height * roi[3]) cropped_image = image.crop((roi_x_start, roi_y_start, roi_x_end, roi_y_end)) return cropped_image def clean_text(text): return re.sub(r'[^a-zA-Z0-9]', '', text).strip().lower() def are_strings_similar(str1, str2, max_distance=3, max_length_diff=2): if str1 == str2: return True if abs(len(str1) - len(str2)) > max_length_diff: return False edit_distance_value = edit_distance(str1, str2) return edit_distance_value <= max_distance def blur_image(image, strength): image_np = np.array(image) blur_strength = int(strength * 50) blur_strength = max(1, blur_strength | 1) blurred_image = cv2.GaussianBlur(image_np, (blur_strength, blur_strength), 0) blurred_pil_image = Image.fromarray(blurred_image) return blurred_pil_image def is_blank(text, limit=15): return len(text) < limit def string_similarity(a, b): return SequenceMatcher(None, a.lower(), b.lower()).ratio() def find_similar_substring(text, keyword, threshold=0.9): text = text.lower() keyword = keyword.lower() if keyword in text: return True keyword_length = len(keyword.split()) words_list = text.split() for i in range(len(words_list) - keyword_length + 1): phrase = ' '.join(words_list[i:i + keyword_length]) similarity = string_similarity(phrase, keyword) if similarity >= threshold: return True return False def destroy_text_roi(image, *roi_params): image_np = np.array(image) h, w, _ = image_np.shape x1 = int(roi_params[0] * w) y1 = int(roi_params[1] * h) x2 = int(roi_params[2] * w) y2 = int(roi_params[3] * h) roi = image_np[y1:y2, x1:x2] blurred_roi = cv2.GaussianBlur(roi, (75, 75), 0) noise = np.random.randint(0, 50, (blurred_roi.shape[0], blurred_roi.shape[1], 3), dtype=np.uint8) noisy_blurred_roi = cv2.add(blurred_roi, noise) image_np[y1:y2, x1:x2] = noisy_blurred_roi return Image.fromarray(image_np) def is_english(text): allowed_pattern = re.compile( r'^[a-zA-Z०-९\u0930\s\.,!?\-;:"\'()]*$' ) return bool(allowed_pattern.match(text)) def is_valid_english(text): english_words = set(words.words()) cleaned_words = ''.join(c.lower() if c.isalnum() else ' ' for c in text).split() return all(word.lower() in english_words for word in cleaned_words)