|
|
import re
|
|
|
import emoji
|
|
|
from PIL import Image
|
|
|
import config
|
|
|
from utils import get_roi, clean_text, are_strings_similar, blur_image, is_blank, is_english, is_valid_english, destroy_text_roi
|
|
|
from model_handler import model_handler
|
|
|
|
|
|
def is_unreadable_tagline(htag, tag):
|
|
|
clean_htag = clean_text(htag)
|
|
|
clean_tag = clean_text(tag)
|
|
|
return not are_strings_similar(clean_htag, clean_tag)
|
|
|
|
|
|
def is_hyperlink_tagline(tag):
|
|
|
substrings = ['www', '.com', 'http']
|
|
|
return any(sub in tag for sub in substrings)
|
|
|
|
|
|
def is_price_tagline(tag):
|
|
|
exclude_keywords = ["crore", "thousand", "million", "billion", "trillion"]
|
|
|
exclude_pattern = r'(₹\.?\s?\d+\s*(lac|lacs|lakh|lakhs|cr|k))|(\brs\.?\s?\d+\s*(lac|lacs|lakh|lakhs|cr|k))|(\$\.?\s?\d+\s*(lac|lacs|lakh|lakhs|cr|k))'
|
|
|
price_pattern = r'(₹\s?\d+)|(\brs\.?\s?\d+)|(\$\s?\d+)|(र\d+)'
|
|
|
|
|
|
if any(keyword in tag for keyword in exclude_keywords):
|
|
|
return False
|
|
|
if re.search(exclude_pattern, tag):
|
|
|
return False
|
|
|
return bool(re.search(price_pattern, tag))
|
|
|
|
|
|
def is_multiple_emoji(emoji_text):
|
|
|
words = emoji_text.split()
|
|
|
last_word = words[-1]
|
|
|
return last_word not in ['0', '1']
|
|
|
|
|
|
def is_incomplete_tagline(tag, is_eng):
|
|
|
tag = emoji.replace_emoji(tag, '')
|
|
|
tag = tag.strip()
|
|
|
if tag.endswith(('...', '..')):
|
|
|
return True
|
|
|
if not is_eng and tag.endswith(('.')):
|
|
|
return True
|
|
|
return False
|
|
|
|
|
|
def tagline(image_path):
|
|
|
results = {
|
|
|
"Empty/Illegible/Black Tagline": 0,
|
|
|
"Multiple Taglines": 0,
|
|
|
"Incomplete Tagline": 0,
|
|
|
"Hyperlink": 0,
|
|
|
"Price Tag": 0,
|
|
|
"Excessive Emojis": 0
|
|
|
}
|
|
|
|
|
|
image = get_roi(image_path, *config.TAG)
|
|
|
himage = blur_image(image, 0.3)
|
|
|
easytag = model_handler.easyocr_ocr(image).lower().strip()
|
|
|
unr = model_handler.easyocr_ocr(himage).lower().strip()
|
|
|
|
|
|
if is_blank(easytag) or is_blank(unr):
|
|
|
results["Empty/Illegible/Black Tagline"] = 1
|
|
|
return results
|
|
|
|
|
|
is_eng = is_english(easytag)
|
|
|
if not is_eng:
|
|
|
results["Empty/Illegible/Black Tagline"] = 0
|
|
|
tag = easytag
|
|
|
else:
|
|
|
Tag = model_handler.intern(image, config.PTAG, 25).strip()
|
|
|
tag = Tag.lower()
|
|
|
|
|
|
htag = model_handler.intern(himage, config.PTAG, 25).lower().strip()
|
|
|
if is_unreadable_tagline(htag, tag):
|
|
|
results["Empty/Illegible/Black Tagline"] = 1
|
|
|
|
|
|
results["Incomplete Tagline"] = 1 if is_incomplete_tagline(tag, is_eng) else 0
|
|
|
results["Hyperlink"] = 1 if is_hyperlink_tagline(tag) else 0
|
|
|
results["Price Tag"] = 1 if is_price_tagline(tag) else 0
|
|
|
|
|
|
imagedt = get_roi(image_path, *config.DTAG)
|
|
|
dtag = model_handler.easyocr_ocr(imagedt).strip()
|
|
|
results["Multiple Taglines"] = 0 if is_blank(dtag) else 1
|
|
|
|
|
|
emoji_resp = model_handler.intern(image, config.PEMO, 100)
|
|
|
results["Excessive Emojis"] = 1 if is_multiple_emoji(emoji_resp) else 0
|
|
|
|
|
|
return results
|
|
|
|
|
|
def cta(image_path):
|
|
|
image = get_roi(image_path, *config.CTA)
|
|
|
cta_text = model_handler.intern(image, config.PTAG, 5).strip()
|
|
|
veng = is_valid_english(cta_text)
|
|
|
eng = is_english(cta_text)
|
|
|
|
|
|
if '.' in cta_text or '..' in cta_text or '...' in cta_text:
|
|
|
return {"Bad CTA": 1}
|
|
|
|
|
|
if any(emoji.is_emoji(c) for c in cta_text):
|
|
|
return {"Bad CTA": 1}
|
|
|
|
|
|
clean_cta_text = clean_text(cta_text)
|
|
|
|
|
|
|
|
|
if eng and len(clean_cta_text) <= 2:
|
|
|
return {"Bad CTA": 1}
|
|
|
|
|
|
if len(clean_cta_text) > 15:
|
|
|
return {"Bad CTA": 1}
|
|
|
|
|
|
return {"Bad CTA": 0}
|
|
|
|
|
|
def tnc(image_path):
|
|
|
image = get_roi(image_path, *config.TNC)
|
|
|
tnc_text = model_handler.easyocr_ocr(image)
|
|
|
clean_tnc = clean_text(tnc_text)
|
|
|
|
|
|
return {"Terms & Conditions": 0 if is_blank(clean_tnc) else 1}
|
|
|
|
|
|
def tooMuchText(image_path):
|
|
|
DRIB = (0.04, 0.625, 1.0, 0.677)
|
|
|
DUP = (0, 0, 1.0, 0.25)
|
|
|
DBEL = (0, 0.85, 1.0, 1)
|
|
|
image = Image.open(image_path).convert('RGB')
|
|
|
image = destroy_text_roi(image, *DRIB)
|
|
|
image = destroy_text_roi(image, *DUP)
|
|
|
image = destroy_text_roi(image, *DBEL)
|
|
|
bd = model_handler.easyocr_ocr(image).lower().strip()
|
|
|
return {"Too Much Text": 1 if len(bd) > 55 else 0}
|
|
|
|