File size: 2,503 Bytes
d790e98
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
import re
import torch
from PIL import Image
from backend import config
from backend.utils import find_similar_substring, destroy_text_roi
from backend.model_handler import model_handler

def is_risky(body_text):
    body_text = re.sub(r'[^a-zA-Z0-9\u0966-\u096F\s]', '', body_text)
    for keyword in config.RISKY_KEYWORDS:
        if find_similar_substring(body_text, keyword):
            return True
    return False

def is_prom_illegal_activity(body_text):
    for phrase in config.ILLEGAL_PHRASES:
        for activity in config.ILLEGAL_ACTIVITIES:
            pattern = rf"{re.escape(phrase)}.*?{re.escape(activity)}"
            if re.search(pattern, body_text):
                return True
    return False

def is_competitor(body_text):
    for brand in config.COMPETITOR_BRANDS:
        if re.search(r'\b' + re.escape(brand) + r'\b', body_text):
            return True
    return False

def body(image_path):
    results = {}
    image = Image.open(image_path).convert('RGB')
    bd = model_handler.intern(image, config.PTAG, 500).lower()
    ocr_substitutions = {'0': 'o', '1': 'l', '!': 'l', '@': 'a', '5': 's', '8': 'b'}

    for char, substitute in ocr_substitutions.items():
        bd = bd.replace(char, substitute)
    bd = ' '.join(bd.split())

    results["High Risk Content"] = 1 if is_risky(bd) else 0
    results["Illegal Content"] = 1 if is_prom_illegal_activity(bd) else 0
    results["Competitor References"] = 1 if is_competitor(bd) else 0

    return results

def offensive(image):
    image = destroy_text_roi(image, *config.TAG)
    
    probs = model_handler.get_clip_probs(image, config.APPROPRIATE_LABELS)
    if probs is None:
        return False

    inappropriate_prob = probs[0][0].item()
    appropriate_prob = probs[0][1].item()

    if inappropriate_prob > appropriate_prob:
        return True
    return False

def religious(image):
    probs = model_handler.get_clip_probs(image, config.RELIGIOUS_LABELS)
    if probs is None:
        return False, None

    highest_score_index = torch.argmax(probs, dim=1).item()

    if highest_score_index != 0:
        return True, config.RELIGIOUS_LABELS[highest_score_index]
    return False, None

def theme(image_path):
    results = {}
    image = Image.open(image_path).convert('RGB')

    results["Inappropriate Content"] = 1 if offensive(image) else 0

    is_religious, religious_label = religious(image)
    results["Religious Content"] = f"1 [{religious_label}]" if is_religious else "0"

    return results