File size: 2,503 Bytes
d790e98 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 |
import re
import torch
from PIL import Image
from backend import config
from backend.utils import find_similar_substring, destroy_text_roi
from backend.model_handler import model_handler
def is_risky(body_text):
body_text = re.sub(r'[^a-zA-Z0-9\u0966-\u096F\s]', '', body_text)
for keyword in config.RISKY_KEYWORDS:
if find_similar_substring(body_text, keyword):
return True
return False
def is_prom_illegal_activity(body_text):
for phrase in config.ILLEGAL_PHRASES:
for activity in config.ILLEGAL_ACTIVITIES:
pattern = rf"{re.escape(phrase)}.*?{re.escape(activity)}"
if re.search(pattern, body_text):
return True
return False
def is_competitor(body_text):
for brand in config.COMPETITOR_BRANDS:
if re.search(r'\b' + re.escape(brand) + r'\b', body_text):
return True
return False
def body(image_path):
results = {}
image = Image.open(image_path).convert('RGB')
bd = model_handler.intern(image, config.PTAG, 500).lower()
ocr_substitutions = {'0': 'o', '1': 'l', '!': 'l', '@': 'a', '5': 's', '8': 'b'}
for char, substitute in ocr_substitutions.items():
bd = bd.replace(char, substitute)
bd = ' '.join(bd.split())
results["High Risk Content"] = 1 if is_risky(bd) else 0
results["Illegal Content"] = 1 if is_prom_illegal_activity(bd) else 0
results["Competitor References"] = 1 if is_competitor(bd) else 0
return results
def offensive(image):
image = destroy_text_roi(image, *config.TAG)
probs = model_handler.get_clip_probs(image, config.APPROPRIATE_LABELS)
if probs is None:
return False
inappropriate_prob = probs[0][0].item()
appropriate_prob = probs[0][1].item()
if inappropriate_prob > appropriate_prob:
return True
return False
def religious(image):
probs = model_handler.get_clip_probs(image, config.RELIGIOUS_LABELS)
if probs is None:
return False, None
highest_score_index = torch.argmax(probs, dim=1).item()
if highest_score_index != 0:
return True, config.RELIGIOUS_LABELS[highest_score_index]
return False, None
def theme(image_path):
results = {}
image = Image.open(image_path).convert('RGB')
results["Inappropriate Content"] = 1 if offensive(image) else 0
is_religious, religious_label = religious(image)
results["Religious Content"] = f"1 [{religious_label}]" if is_religious else "0"
return results
|