Spaces:

devranx
/

PRISM2.0

Sleeping

App Files Files Community

PRISM2.0 / backend /utils.py

devranx

Initial deploy with LFS images and audio

d790e98 25 days ago

raw

history blame contribute delete

3.95 kB

	import re
	import numpy as np
	import cv2
	from PIL import Image
	import random
	import torch
	import torchvision.transforms as T
	from torchvision.transforms.functional import InterpolationMode
	from difflib import SequenceMatcher
	from nltk.metrics.distance import edit_distance
	import nltk

	# Ensure NLTK data is downloaded
	try:
	nltk.data.find('corpora/words.zip')
	except LookupError:
	nltk.download('words')
	try:
	nltk.data.find('tokenizers/punkt')
	except LookupError:
	nltk.download('punkt')

	from nltk.corpus import words

	def set_seed(seed=42):
	random.seed(seed)
	np.random.seed(seed)
	torch.manual_seed(seed)
	# torch.cuda.manual_seed_all(seed) # Uncomment if using GPU
	torch.backends.cudnn.deterministic = True
	torch.backends.cudnn.benchmark = False

	def build_transform(input_size=448):
	mean = (0.485, 0.456, 0.406)
	std = (0.229, 0.224, 0.225)
	return T.Compose([
	T.Lambda(lambda img: img.convert('RGB')),
	T.Resize((input_size, input_size), interpolation=InterpolationMode.BICUBIC),
	T.ToTensor(),
	T.Normalize(mean=mean, std=std)
	])

	def get_roi(image_path_or_obj, *roi):
	"""
	Extracts ROI from an image path or PIL Image object.
	"""
	if isinstance(image_path_or_obj, str):
	image = Image.open(image_path_or_obj).convert('RGB')
	else:
	image = image_path_or_obj.convert('RGB')

	width, height = image.size

	roi_x_start = int(width * roi[0])
	roi_y_start = int(height * roi[1])
	roi_x_end = int(width * roi[2])
	roi_y_end = int(height * roi[3])

	cropped_image = image.crop((roi_x_start, roi_y_start, roi_x_end, roi_y_end))
	return cropped_image

	def clean_text(text):
	return re.sub(r'[^a-zA-Z0-9]', '', text).strip().lower()

	def are_strings_similar(str1, str2, max_distance=3, max_length_diff=2):
	if str1 == str2:
	return True
	if abs(len(str1) - len(str2)) > max_length_diff:
	return False
	edit_distance_value = edit_distance(str1, str2)
	return edit_distance_value <= max_distance

	def blur_image(image, strength):
	image_np = np.array(image)
	blur_strength = int(strength * 50)
	blur_strength = max(1, blur_strength \| 1)
	blurred_image = cv2.GaussianBlur(image_np, (blur_strength, blur_strength), 0)
	blurred_pil_image = Image.fromarray(blurred_image)
	return blurred_pil_image

	def is_blank(text, limit=15):
	return len(text) < limit

	def string_similarity(a, b):
	return SequenceMatcher(None, a.lower(), b.lower()).ratio()

	def find_similar_substring(text, keyword, threshold=0.9):
	text = text.lower()
	keyword = keyword.lower()

	if keyword in text:
	return True

	keyword_length = len(keyword.split())
	words_list = text.split()

	for i in range(len(words_list) - keyword_length + 1):
	phrase = ' '.join(words_list[i:i + keyword_length])
	similarity = string_similarity(phrase, keyword)
	if similarity >= threshold:
	return True

	return False

	def destroy_text_roi(image, *roi_params):
	image_np = np.array(image)

	h, w, _ = image_np.shape
	x1 = int(roi_params[0] * w)
	y1 = int(roi_params[1] * h)
	x2 = int(roi_params[2] * w)
	y2 = int(roi_params[3] * h)

	roi = image_np[y1:y2, x1:x2]

	blurred_roi = cv2.GaussianBlur(roi, (75, 75), 0)
	noise = np.random.randint(0, 50, (blurred_roi.shape[0], blurred_roi.shape[1], 3), dtype=np.uint8)
	noisy_blurred_roi = cv2.add(blurred_roi, noise)
	image_np[y1:y2, x1:x2] = noisy_blurred_roi
	return Image.fromarray(image_np)

	def is_english(text):
	allowed_pattern = re.compile(
	r'^[a-zA-Z०-९\u0930\s\.,!?\-;:"\'()]*$'
	)
	return bool(allowed_pattern.match(text))

	def is_valid_english(text):
	english_words = set(words.words())
	cleaned_words = ''.join(c.lower() if c.isalnum() else ' ' for c in text).split()
	return all(word.lower() in english_words for word in cleaned_words)