File size: 4,228 Bytes
d790e98
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
import re
import emoji
from PIL import Image
from backend import config
from backend.utils import get_roi, clean_text, are_strings_similar, blur_image, is_blank, is_english, is_valid_english, destroy_text_roi
from backend.model_handler import model_handler

def is_unreadable_tagline(htag, tag):
    clean_htag = clean_text(htag)
    clean_tag = clean_text(tag)
    return not are_strings_similar(clean_htag, clean_tag)

def is_hyperlink_tagline(tag):
    substrings = ['www', '.com', 'http']
    return any(sub in tag for sub in substrings)

def is_price_tagline(tag):
    exclude_keywords = ["crore", "thousand", "million", "billion", "trillion"]
    exclude_pattern = r'(₹\.?\s?\d+\s*(lac|lacs|lakh|lakhs|cr|k))|(\brs\.?\s?\d+\s*(lac|lacs|lakh|lakhs|cr|k))|(\$\.?\s?\d+\s*(lac|lacs|lakh|lakhs|cr|k))'
    price_pattern = r'(₹\s?\d+)|(\brs\.?\s?\d+)|(\$\s?\d+)|(र\d+)'
    
    if any(keyword in tag for keyword in exclude_keywords):
        return False
    if re.search(exclude_pattern, tag):
        return False
    return bool(re.search(price_pattern, tag))

def is_multiple_emoji(emoji_text):
    words = emoji_text.split()
    last_word = words[-1]
    return last_word not in ['0', '1']

def is_incomplete_tagline(tag, is_eng):
    tag = emoji.replace_emoji(tag, '')
    tag = tag.strip()
    if tag.endswith(('...', '..')):
        return True
    if not is_eng and tag.endswith(('.')):
        return True
    return False

def tagline(image_path):
    results = {
        "Empty/Illegible/Black Tagline": 0,
        "Multiple Taglines": 0,
        "Incomplete Tagline": 0,
        "Hyperlink": 0,
        "Price Tag": 0,
        "Excessive Emojis": 0
    }

    image = get_roi(image_path, *config.TAG)
    himage = blur_image(image, 0.3)
    easytag = model_handler.easyocr_ocr(image).lower().strip()
    unr = model_handler.easyocr_ocr(himage).lower().strip()

    if is_blank(easytag) or is_blank(unr):
        results["Empty/Illegible/Black Tagline"] = 1
        return results

    is_eng = is_english(easytag)
    if not is_eng:
        results["Empty/Illegible/Black Tagline"] = 0
        tag = easytag
    else:
        Tag = model_handler.intern(image, config.PTAG, 25).strip()
        tag = Tag.lower()

        htag = model_handler.intern(himage, config.PTAG, 25).lower().strip()
        if is_unreadable_tagline(htag, tag):
            results["Empty/Illegible/Black Tagline"] = 1
            
    results["Incomplete Tagline"] = 1 if is_incomplete_tagline(tag, is_eng) else 0
    results["Hyperlink"] = 1 if is_hyperlink_tagline(tag) else 0
    results["Price Tag"] = 1 if is_price_tagline(tag) else 0

    imagedt = get_roi(image_path, *config.DTAG)
    dtag = model_handler.easyocr_ocr(imagedt).strip()
    results["Multiple Taglines"] = 0 if is_blank(dtag) else 1

    emoji_resp = model_handler.intern(image, config.PEMO, 100)
    results["Excessive Emojis"] = 1 if is_multiple_emoji(emoji_resp) else 0

    return results

def cta(image_path):
    image = get_roi(image_path, *config.CTA)
    cta_text = model_handler.intern(image, config.PTAG, 5).strip()
    veng = is_valid_english(cta_text)
    eng = is_english(cta_text)

    if '.' in cta_text or '..' in cta_text or '...' in cta_text:
        return {"Bad CTA": 1}

    if any(emoji.is_emoji(c) for c in cta_text):
        return {"Bad CTA": 1}

    clean_cta_text = clean_text(cta_text)
    # print(len(clean_cta_text)) # Removed print

    if eng and len(clean_cta_text) <= 2:
        return {"Bad CTA": 1}

    if len(clean_cta_text) > 15:
        return {"Bad CTA": 1}

    return {"Bad CTA": 0}

def tnc(image_path):
    image = get_roi(image_path, *config.TNC)
    tnc_text = model_handler.easyocr_ocr(image)
    clean_tnc = clean_text(tnc_text)

    return {"Terms & Conditions": 0 if is_blank(clean_tnc) else 1}

def tooMuchText(image_path):
    DRIB = (0.04, 0.625, 1.0, 0.677)
    DUP = (0, 0, 1.0, 0.25)
    DBEL = (0, 0.85, 1.0, 1)
    image = Image.open(image_path).convert('RGB')
    image = destroy_text_roi(image, *DRIB)
    image = destroy_text_roi(image, *DUP)
    image = destroy_text_roi(image, *DBEL)
    bd = model_handler.easyocr_ocr(image).lower().strip()
    return {"Too Much Text": 1 if len(bd) > 55 else 0}