PRISM2.0 / backend /utils.py
devranx's picture
Initial deploy with LFS images and audio
d790e98
raw
history blame
3.95 kB
import re
import numpy as np
import cv2
from PIL import Image
import random
import torch
import torchvision.transforms as T
from torchvision.transforms.functional import InterpolationMode
from difflib import SequenceMatcher
from nltk.metrics.distance import edit_distance
import nltk
# Ensure NLTK data is downloaded
try:
nltk.data.find('corpora/words.zip')
except LookupError:
nltk.download('words')
try:
nltk.data.find('tokenizers/punkt')
except LookupError:
nltk.download('punkt')
from nltk.corpus import words
def set_seed(seed=42):
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
# torch.cuda.manual_seed_all(seed) # Uncomment if using GPU
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
def build_transform(input_size=448):
mean = (0.485, 0.456, 0.406)
std = (0.229, 0.224, 0.225)
return T.Compose([
T.Lambda(lambda img: img.convert('RGB')),
T.Resize((input_size, input_size), interpolation=InterpolationMode.BICUBIC),
T.ToTensor(),
T.Normalize(mean=mean, std=std)
])
def get_roi(image_path_or_obj, *roi):
"""
Extracts ROI from an image path or PIL Image object.
"""
if isinstance(image_path_or_obj, str):
image = Image.open(image_path_or_obj).convert('RGB')
else:
image = image_path_or_obj.convert('RGB')
width, height = image.size
roi_x_start = int(width * roi[0])
roi_y_start = int(height * roi[1])
roi_x_end = int(width * roi[2])
roi_y_end = int(height * roi[3])
cropped_image = image.crop((roi_x_start, roi_y_start, roi_x_end, roi_y_end))
return cropped_image
def clean_text(text):
return re.sub(r'[^a-zA-Z0-9]', '', text).strip().lower()
def are_strings_similar(str1, str2, max_distance=3, max_length_diff=2):
if str1 == str2:
return True
if abs(len(str1) - len(str2)) > max_length_diff:
return False
edit_distance_value = edit_distance(str1, str2)
return edit_distance_value <= max_distance
def blur_image(image, strength):
image_np = np.array(image)
blur_strength = int(strength * 50)
blur_strength = max(1, blur_strength | 1)
blurred_image = cv2.GaussianBlur(image_np, (blur_strength, blur_strength), 0)
blurred_pil_image = Image.fromarray(blurred_image)
return blurred_pil_image
def is_blank(text, limit=15):
return len(text) < limit
def string_similarity(a, b):
return SequenceMatcher(None, a.lower(), b.lower()).ratio()
def find_similar_substring(text, keyword, threshold=0.9):
text = text.lower()
keyword = keyword.lower()
if keyword in text:
return True
keyword_length = len(keyword.split())
words_list = text.split()
for i in range(len(words_list) - keyword_length + 1):
phrase = ' '.join(words_list[i:i + keyword_length])
similarity = string_similarity(phrase, keyword)
if similarity >= threshold:
return True
return False
def destroy_text_roi(image, *roi_params):
image_np = np.array(image)
h, w, _ = image_np.shape
x1 = int(roi_params[0] * w)
y1 = int(roi_params[1] * h)
x2 = int(roi_params[2] * w)
y2 = int(roi_params[3] * h)
roi = image_np[y1:y2, x1:x2]
blurred_roi = cv2.GaussianBlur(roi, (75, 75), 0)
noise = np.random.randint(0, 50, (blurred_roi.shape[0], blurred_roi.shape[1], 3), dtype=np.uint8)
noisy_blurred_roi = cv2.add(blurred_roi, noise)
image_np[y1:y2, x1:x2] = noisy_blurred_roi
return Image.fromarray(image_np)
def is_english(text):
allowed_pattern = re.compile(
r'^[a-zA-Z०-९\u0930\s\.,!?\-;:"\'()]*$'
)
return bool(allowed_pattern.match(text))
def is_valid_english(text):
english_words = set(words.words())
cleaned_words = ''.join(c.lower() if c.isalnum() else ' ' for c in text).split()
return all(word.lower() in english_words for word in cleaned_words)