Spaces:

devranx
/

PRISM2.0

Running

App Files Files Community

PRISM2.0 / backend /modules /text_checks.py

devranx

Initial deploy with LFS images and audio

d790e98 26 days ago

raw

history blame contribute delete

4.23 kB

	import re
	import emoji
	from PIL import Image
	from backend import config
	from backend.utils import get_roi, clean_text, are_strings_similar, blur_image, is_blank, is_english, is_valid_english, destroy_text_roi
	from backend.model_handler import model_handler

	def is_unreadable_tagline(htag, tag):
	clean_htag = clean_text(htag)
	clean_tag = clean_text(tag)
	return not are_strings_similar(clean_htag, clean_tag)

	def is_hyperlink_tagline(tag):
	substrings = ['www', '.com', 'http']
	return any(sub in tag for sub in substrings)

	def is_price_tagline(tag):
	exclude_keywords = ["crore", "thousand", "million", "billion", "trillion"]
	exclude_pattern = r'(₹\.?\s?\d+\s(lac\|lacs\|lakh\|lakhs\|cr\|k))\|(\brs\.?\s?\d+\s(lac\|lacs\|lakh\|lakhs\|cr\|k))\|(\$\.?\s?\d+\s*(lac\|lacs\|lakh\|lakhs\|cr\|k))'
	price_pattern = r'(₹\s?\d+)\|(\brs\.?\s?\d+)\|(\$\s?\d+)\|(र\d+)'

	if any(keyword in tag for keyword in exclude_keywords):
	return False
	if re.search(exclude_pattern, tag):
	return False
	return bool(re.search(price_pattern, tag))

	def is_multiple_emoji(emoji_text):
	words = emoji_text.split()
	last_word = words[-1]
	return last_word not in ['0', '1']

	def is_incomplete_tagline(tag, is_eng):
	tag = emoji.replace_emoji(tag, '')
	tag = tag.strip()
	if tag.endswith(('...', '..')):
	return True
	if not is_eng and tag.endswith(('.')):
	return True
	return False

	def tagline(image_path):
	results = {
	"Empty/Illegible/Black Tagline": 0,
	"Multiple Taglines": 0,
	"Incomplete Tagline": 0,
	"Hyperlink": 0,
	"Price Tag": 0,
	"Excessive Emojis": 0
	}

	image = get_roi(image_path, *config.TAG)
	himage = blur_image(image, 0.3)
	easytag = model_handler.easyocr_ocr(image).lower().strip()
	unr = model_handler.easyocr_ocr(himage).lower().strip()

	if is_blank(easytag) or is_blank(unr):
	results["Empty/Illegible/Black Tagline"] = 1
	return results

	is_eng = is_english(easytag)
	if not is_eng:
	results["Empty/Illegible/Black Tagline"] = 0
	tag = easytag
	else:
	Tag = model_handler.intern(image, config.PTAG, 25).strip()
	tag = Tag.lower()

	htag = model_handler.intern(himage, config.PTAG, 25).lower().strip()
	if is_unreadable_tagline(htag, tag):
	results["Empty/Illegible/Black Tagline"] = 1

	results["Incomplete Tagline"] = 1 if is_incomplete_tagline(tag, is_eng) else 0
	results["Hyperlink"] = 1 if is_hyperlink_tagline(tag) else 0
	results["Price Tag"] = 1 if is_price_tagline(tag) else 0

	imagedt = get_roi(image_path, *config.DTAG)
	dtag = model_handler.easyocr_ocr(imagedt).strip()
	results["Multiple Taglines"] = 0 if is_blank(dtag) else 1

	emoji_resp = model_handler.intern(image, config.PEMO, 100)
	results["Excessive Emojis"] = 1 if is_multiple_emoji(emoji_resp) else 0

	return results

	def cta(image_path):
	image = get_roi(image_path, *config.CTA)
	cta_text = model_handler.intern(image, config.PTAG, 5).strip()
	veng = is_valid_english(cta_text)
	eng = is_english(cta_text)

	if '.' in cta_text or '..' in cta_text or '...' in cta_text:
	return {"Bad CTA": 1}

	if any(emoji.is_emoji(c) for c in cta_text):
	return {"Bad CTA": 1}

	clean_cta_text = clean_text(cta_text)
	# print(len(clean_cta_text)) # Removed print

	if eng and len(clean_cta_text) <= 2:
	return {"Bad CTA": 1}

	if len(clean_cta_text) > 15:
	return {"Bad CTA": 1}

	return {"Bad CTA": 0}

	def tnc(image_path):
	image = get_roi(image_path, *config.TNC)
	tnc_text = model_handler.easyocr_ocr(image)
	clean_tnc = clean_text(tnc_text)

	return {"Terms & Conditions": 0 if is_blank(clean_tnc) else 1}

	def tooMuchText(image_path):
	DRIB = (0.04, 0.625, 1.0, 0.677)
	DUP = (0, 0, 1.0, 0.25)
	DBEL = (0, 0.85, 1.0, 1)
	image = Image.open(image_path).convert('RGB')
	image = destroy_text_roi(image, *DRIB)
	image = destroy_text_roi(image, *DUP)
	image = destroy_text_roi(image, *DBEL)
	bd = model_handler.easyocr_ocr(image).lower().strip()
	return {"Too Much Text": 1 if len(bd) > 55 else 0}