csam_patterns / app.py
LPX55's picture
Create app.py
840b896 verified
import re
from typing import List, Tuple
import gradio as gr
class ContentSafetyChecker:
def __init__(self):
# Pre-compile regex patterns for performance
self.csam_patterns = self._compile_csam_patterns()
self.suspicious_combinations = self._compile_suspicious_combinations()
def _compile_csam_patterns(self) -> List[re.Pattern]:
"""Compile CSAM detection patterns"""
patterns = [
# Direct CSAM references
r'\b(csam|child.*sexual|minor.*abuse|illegal.*content)',
# Age + sexual content combinations
r'\b(child|kid|baby|toddler|infant|minor|youngster|youth|teen).*\b(sex|nude|naked|pussy|penis|vagina)',
r'\b(sex|nude|naked|porn|erotic|seductive|lust|breast|genital|penis|vagina).*\b(child|kid|baby|toddler|infant|minor|youngster|youth|teen)',
# Numerical age checks with sexual content
r'\b(?:age|aged?|years? old)\s*[=\-:]\s*(?:[0-9]|1[0-7])\b.*\b(sex|nude|naked|porn|erotic)',
r'\b([0-9]|1[0-7])\s*(?:years? old|yo|y/o)\b.*\b(sex|nude|naked|porn|erotic)',
# Numbers 3-17 directly followed by sexual content terms
r'\b([3-9]|1[0-7])\s*[,\-]?\s*(sex|nude|naked|porn|erotic|seductive|explicit)',
# Descriptive numerical age checks
r'\b(under\s*(1[0-8]|[0-9])|below\s*(1[0-8]|[0-9])|less than\s*(1[0-8]|[0-9]))\s*years?\s*[,\-]?\s*(sex|nude|naked|porn|erotic)',
# Inappropriate scenarios with numerical ages
r'\b(pedophil|lolita|underage.*sex|minor.*sex|illegal.*sex)',
# Grooming language
r'\b(inappropriate.*adult.*child|adult.*minor.*relationship|groom)',
]
return [re.compile(pattern, re.IGNORECASE) for pattern in patterns]
def _compile_suspicious_combinations(self) -> List[Tuple[List[str], List[str], int]]:
"""Define suspicious word combinations with numerical thresholds"""
return [
# Age indicators + sexual content ( Fixed tuple structure and removed extra parameters)
(['4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', 'four', 'five', 'six', 'seven', 'eight', 'nine', 'ten', 'eleven', 'twelve', 'thirteen', 'fourteen', 'fifteen', 'sixteen', 'seventeen', 'young teen', 'minor', 'child', 'kid', 'baby'],
['nude', 'naked', 'sex', 'porn', 'erotic', 'seductive', 'penis', 'vagina', 'penetration', 'cock', 'pussy'], 8),
(['adult'],
['pedo', 'loli', 'secret', 'rape'], 5),
]
def check_pattern_matching(self, prompt: str) -> bool:
"""First line of defense: regex pattern matching"""
for pattern in self.csam_patterns:
if pattern.search(prompt):
print(f"🚨 Pattern matched: {pattern.pattern}")
return True
return False
def check_numerical_age_context(self, prompt: str) -> bool:
"""Check for numerical ages in suspicious contexts"""
prompt_lower = prompt.lower()
# Extract all numbers from prompt
numbers = re.findall(r'\b\d+\b', prompt_lower)
# Suspicious sexual content terms
sexual_terms = ['nude', 'naked', 'sex', 'porn', 'erotic', 'seductive', 'explicit', 'undress', 'penis', 'vagina', 'pussy', 'cock']
# Check each number
for num_str in numbers:
try:
age = int(num_str)
# If age is in minor range (3-17) and sexual content is mentioned
if 3 <= age <= 17 and any(term in prompt_lower for term in sexual_terms):
print(f"🚨 Suspicious age with sexual content: {age}")
return True
# Also check for descriptive phrases like "12 year old"
age_phrases = [
f"{num_str} year old",
f"{num_str} years old",
f"{num_str}yo",
f"{num_str} y/o"
]
if any(phrase in prompt_lower for phrase in age_phrases):
if 3 <= age <= 17 and any(term in prompt_lower for term in sexual_terms):
print(f"🚨 Suspicious age phrase with sexual content: {age}")
return True
except ValueError:
continue
# Check for phrases like "under 18", "below 16", etc.
age_limit_phrases = [
r'\bunder\s*(1[0-8]|[0-9])\b',
r'\bbelow\s*(1[0-8]|[0-9])\b',
r'\bless than\s*(1[0-8]|[0-9])\b',
r'\b[0-9]{1,2}\s*and under\b'
]
for phrase_pattern in age_limit_phrases:
if re.search(phrase_pattern, prompt_lower):
# Check if followed by sexual content within reasonable distance
match = re.search(phrase_pattern, prompt_lower)
phrase_end = match.end()
remaining_text = prompt_lower[phrase_end:]
if any(term in remaining_text for term in sexual_terms):
print(f"🚨 Age limit phrase with sexual content: {match.group()}")
return True
return False
def check_age_sexual_combinations(self, prompt: str) -> bool:
"""More sophisticated check for age terms combined with sexual content"""
prompt_lower = prompt.lower()
# Check age + sexual content with numerical extraction
age_sexual_patterns = [
r'(child|kid|baby|toddler|infant|minor|youngster|teen|youth)\s*(?:of|is|was)?\s*(\d{1,2})\s*(years?\s*old)?',
r'(\d{1,2})\s*(years?\s*old|yo|y/o)?\s*(child|kid|baby|toddler|infant|minor|young)',
]
sexual_content_patterns = ['nude', 'naked', 'sex', 'porn', 'erotic', 'explicit', 'sexual', 'penis', 'vagina', 'pussy', 'cock']
for age_pattern in age_sexual_patterns:
age_matches = re.finditer(age_pattern, prompt_lower)
for match in age_matches:
# Extract age number from match groups
age = None
for group in match.groups():
if group and group.isdigit():
age = int(group)
break
if age is not None and 3 <= age <= 17: # Minor age range
# Look for sexual content nearby (within 50 characters)
start_pos = max(0, match.start() - 50)
end_pos = min(len(prompt_lower), match.end() + 50)
context = prompt_lower[start_pos:end_pos]
if any(term in context for term in sexual_content_patterns):
print(f"🚨 Age-sexual content combination detected: age {age}")
return True
return False
def check_word_proximity(self, prompt: str) -> bool: # Removed unused max_distance parameter
"""Second line: check proximity of suspicious words including numerical checks"""
words = prompt.lower().split()
# Original non-numerical proximity checks
for age_words, sexual_words, distance_threshold in self.suspicious_combinations:
for age_word in age_words:
for sexual_word in sexual_words:
if age_word in words and sexual_word in words:
try:
age_idx = words.index(age_word)
sexual_idx = words.index(sexual_word)
if abs(age_idx - sexual_idx) <= distance_threshold:
return True
except ValueError:
continue
return False
def simple_age_check(self, prompt: str) -> bool: # Added self parameter
"""Simple numerical age check"""
numbers = re.findall(r'\b\d+\b', prompt)
sexual_terms = ['nude', 'naked', 'sex', 'porn', 'erotic', 'seductive', 'explicit', 'penis', 'vagina', 'pussy', 'cock']
for num in numbers:
try:
age = int(num)
if 3 <= age <= 17 and any(term in prompt.lower() for term in sexual_terms):
return True
except ValueError:
continue
return False
def check_suspicious_phrases(self, prompt: str) -> bool:
"""Third line: check for specific suspicious phrases"""
suspicious_phrases = [
'young model', 'teen photoshoot', 'minor content',
'child art', 'kids only', 'no adults allowed',
'private session', 'secret photos', 'hidden content'
]
prompt_lower = prompt.lower()
for phrase in suspicious_phrases:
if phrase in prompt_lower:
return True
return False
def check_evasion_patterns(self, prompt: str) -> bool:
"""Fourth line: check for common evasion techniques"""
# Look for character substitutions or obfuscation
evasion_indicators = [
r'p\w*is',
r'p\w*sy',
r's\w*x',
r'n\w*d\w*',
]
for pattern in evasion_indicators:
if re.search(pattern, prompt, re.IGNORECASE):
# Additional check: see if it's combined with age-related terms
age_indicators = ['old', 'teen', 'baby', 'kid', 'child', 'minor', 'young']
if any(indicator in prompt.lower() for indicator in age_indicators):
return True
return False
def check_numerical_age_indicators(self, prompt: str) -> bool:
"""Fifth line: check for specific age numbers with sexual content"""
# Extract numbers from prompt
numbers = re.findall(r'\b\d+\b', prompt)
sexual_indicators = ['nude', 'naked', 'sex', 'porn', 'erotic', 'nudify', 'off', 'penis', 'cock', 'pussy', 'vagina', 'insert']
for num_str in numbers:
try:
age = int(num_str)
# Flag ages typically associated with minors when combined with sexual content
if 3 <= age <= 17:
prompt_lower = prompt.lower()
if any(indicator in prompt_lower for indicator in sexual_indicators):
return True
except ValueError:
continue
return False
def check_contextual_red_flags(self, prompt: str) -> bool:
"""Sixth line: check for contextual red flags"""
red_flag_contexts = [
# Suspicious session types
(r'(private|secret|hidden|exclusive).*photo', ['nude', 'naked', 'photos']), # Fixed this tuple (was missing opening parenthesis)
# Suspicious content descriptions
(r'(special|unique|exclusive|view|angle|pov|shot).*content', ['young', 'minor', 'teen'])
]
prompt_lower = prompt.lower()
for context_pattern, suspicious_words in red_flag_contexts:
if re.search(context_pattern, prompt_lower):
if any(word in prompt_lower for word in suspicious_words):
return True
return False
def is_content_suspicious(self, prompt: str) -> Tuple[bool, str]:
"""Main safety checking function with detailed feedback"""
checks = [
(self.check_pattern_matching, "Pattern matching detected suspicious content"),
(self.check_word_proximity, "Suspicious word proximity detected"),
(self.check_suspicious_phrases, "Suspicious phrases detected"),
(self.check_evasion_patterns, "Potential evasion patterns detected"),
(self.check_numerical_age_indicators, "Suspicious age indicators with sexual content"),
(self.check_contextual_red_flags, "Contextual red flags detected"),
(self.simple_age_check, "Simple age check detected suspicious content"), # Added the missing simple_age_check
(self.check_numerical_age_context, "Numerical age context check detected suspicious content"), # Added numerical age context check
(self.check_age_sexual_combinations, "Age-sexual combination check detected suspicious content") # Added age-sexual combination check
]
for check_func, message in checks:
try:
if check_func(prompt):
return True, message
except Exception as e:
print(f"Warning: Safety check {check_func.__name__} failed: {e}")
continue
return False, "Content appears safe"
# Enhanced safety function
def comprehensive_safety_check(prompt: str) -> Tuple[bool, str]:
"""Multi-layer safety checking with fallback mechanisms"""
try:
# Initialize the safety checker for each check (ensures fresh state)
safety_checker = ContentSafetyChecker()
# Primary check
is_suspicious, message = safety_checker.is_content_suspicious(prompt)
if is_suspicious:
return True, message
# Fallback checks if primary fails
fallback_checks = [
lambda p: len(p) > 1000, # Unusually long prompts (potential obfuscation)
lambda p: p.count('"') > 20, # Excessive quotes (potential code injection)
lambda p: '||' in p or '&&' in p, # Shell command operators
lambda p: any(char in p for char in ['<script', 'javascript:', 'onload=']), # Basic XSS
]
for i, check in enumerate(fallback_checks):
try:
if check(prompt):
return True, f"Fallback safety check #{i+1} triggered"
except Exception:
continue
return False, "All safety checks passed"
except Exception as e:
print(f"Safety check failed, erring on caution: {e}")
return True, "Safety system error - content blocked for caution"
# Usage in your application
def test_prompt(original_prompt: str) -> str:
is_blocked, reason = comprehensive_safety_check(original_prompt)
if is_blocked:
print(f"🚨 SAFETY BLOCK: {reason} - Prompt: {original_prompt[:100]}...")
# Log the blocked content (for monitoring)
try:
with open("safety_blocks.log", "a") as f:
f.write(f"{reason}: {original_prompt}\n")
except Exception:
print("Warning: Could not write to safety log")
return "A professional and appropriate image editing task" # Safe fallback
# If safe, proceed with normal enhancement
return f"Regex safety check passed: {original_prompt}"
import gradio as gr
with gr.Blocks(title="Content Safety Checker | Testing Zone") as demo:
gr.Markdown("""
# 🛡️ Regex-based Content Safety Checker
Some of you guys need to seek help...
""")
with gr.Row():
with gr.Column(scale=1):
input_prompt = gr.Textbox(
label="Test Prompt",
placeholder="Type a prompt to check for safety...",
lines=5,
max_lines=10
)
check_button = gr.Button("Check Safety", variant="primary")
clear_button = gr.Button("Clear", variant="secondary")
with gr.Column(scale=2):
output_result = gr.Textbox(
label="Safety Check Result",
interactive=False,
lines=10,
max_lines=15
)
safety_info = gr.HTML(
value="<div style='padding:15px; margin-top:15px; background: #f8f9fa; border-radius: 8px;'>"
"<h4>ℹ️ How it works:</h4>"
"<ul>"
"<li>Multi-layer safety checking system</li>"
"<li>Detects age-related terms combined with sexual content</li>"
"<li>Identifies potential CSAM/illegal content patterns</li>"
"<li>Checks for evasion techniques and suspicious combinations</li>"
"</ul>"
"</div>"
)
def process_prompt(prompt):
if not prompt or not prompt.strip():
return "Please enter a prompt to test."
is_blocked, reason = comprehensive_safety_check(prompt)
if is_blocked:
return f"🚨 BLOCKED: {reason}\n\nOriginal prompt: {prompt}"
else:
return f"✅ SAFE: {reason}\n\nOriginal prompt: {prompt}"
def clear_inputs():
return "", ""
check_button.click(
fn=process_prompt,
inputs=input_prompt,
outputs=output_result
)
input_prompt.submit(
fn=process_prompt,
inputs=input_prompt,
outputs=output_result
)
clear_button.click(
fn=clear_inputs,
inputs=None,
outputs=[input_prompt, output_result]
)
demo.launch()