Spaces:
Sleeping
Sleeping
| import re | |
| from typing import List, Tuple | |
| import gradio as gr | |
| class ContentSafetyChecker: | |
| def __init__(self): | |
| # Pre-compile regex patterns for performance | |
| self.csam_patterns = self._compile_csam_patterns() | |
| self.suspicious_combinations = self._compile_suspicious_combinations() | |
| def _compile_csam_patterns(self) -> List[re.Pattern]: | |
| """Compile CSAM detection patterns""" | |
| patterns = [ | |
| # Direct CSAM references | |
| r'\b(csam|child.*sexual|minor.*abuse|illegal.*content)', | |
| # Age + sexual content combinations | |
| r'\b(child|kid|baby|toddler|infant|minor|youngster|youth|teen).*\b(sex|nude|naked|pussy|penis|vagina)', | |
| r'\b(sex|nude|naked|porn|erotic|seductive|lust|breast|genital|penis|vagina).*\b(child|kid|baby|toddler|infant|minor|youngster|youth|teen)', | |
| # Numerical age checks with sexual content | |
| r'\b(?:age|aged?|years? old)\s*[=\-:]\s*(?:[0-9]|1[0-7])\b.*\b(sex|nude|naked|porn|erotic)', | |
| r'\b([0-9]|1[0-7])\s*(?:years? old|yo|y/o)\b.*\b(sex|nude|naked|porn|erotic)', | |
| # Numbers 3-17 directly followed by sexual content terms | |
| r'\b([3-9]|1[0-7])\s*[,\-]?\s*(sex|nude|naked|porn|erotic|seductive|explicit)', | |
| # Descriptive numerical age checks | |
| r'\b(under\s*(1[0-8]|[0-9])|below\s*(1[0-8]|[0-9])|less than\s*(1[0-8]|[0-9]))\s*years?\s*[,\-]?\s*(sex|nude|naked|porn|erotic)', | |
| # Inappropriate scenarios with numerical ages | |
| r'\b(pedophil|lolita|underage.*sex|minor.*sex|illegal.*sex)', | |
| # Grooming language | |
| r'\b(inappropriate.*adult.*child|adult.*minor.*relationship|groom)', | |
| ] | |
| return [re.compile(pattern, re.IGNORECASE) for pattern in patterns] | |
| def _compile_suspicious_combinations(self) -> List[Tuple[List[str], List[str], int]]: | |
| """Define suspicious word combinations with numerical thresholds""" | |
| return [ | |
| # Age indicators + sexual content ( Fixed tuple structure and removed extra parameters) | |
| (['4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', 'four', 'five', 'six', 'seven', 'eight', 'nine', 'ten', 'eleven', 'twelve', 'thirteen', 'fourteen', 'fifteen', 'sixteen', 'seventeen', 'young teen', 'minor', 'child', 'kid', 'baby'], | |
| ['nude', 'naked', 'sex', 'porn', 'erotic', 'seductive', 'penis', 'vagina', 'penetration', 'cock', 'pussy'], 8), | |
| (['adult'], | |
| ['pedo', 'loli', 'secret', 'rape'], 5), | |
| ] | |
| def check_pattern_matching(self, prompt: str) -> bool: | |
| """First line of defense: regex pattern matching""" | |
| for pattern in self.csam_patterns: | |
| if pattern.search(prompt): | |
| print(f"🚨 Pattern matched: {pattern.pattern}") | |
| return True | |
| return False | |
| def check_numerical_age_context(self, prompt: str) -> bool: | |
| """Check for numerical ages in suspicious contexts""" | |
| prompt_lower = prompt.lower() | |
| # Extract all numbers from prompt | |
| numbers = re.findall(r'\b\d+\b', prompt_lower) | |
| # Suspicious sexual content terms | |
| sexual_terms = ['nude', 'naked', 'sex', 'porn', 'erotic', 'seductive', 'explicit', 'undress', 'penis', 'vagina', 'pussy', 'cock'] | |
| # Check each number | |
| for num_str in numbers: | |
| try: | |
| age = int(num_str) | |
| # If age is in minor range (3-17) and sexual content is mentioned | |
| if 3 <= age <= 17 and any(term in prompt_lower for term in sexual_terms): | |
| print(f"🚨 Suspicious age with sexual content: {age}") | |
| return True | |
| # Also check for descriptive phrases like "12 year old" | |
| age_phrases = [ | |
| f"{num_str} year old", | |
| f"{num_str} years old", | |
| f"{num_str}yo", | |
| f"{num_str} y/o" | |
| ] | |
| if any(phrase in prompt_lower for phrase in age_phrases): | |
| if 3 <= age <= 17 and any(term in prompt_lower for term in sexual_terms): | |
| print(f"🚨 Suspicious age phrase with sexual content: {age}") | |
| return True | |
| except ValueError: | |
| continue | |
| # Check for phrases like "under 18", "below 16", etc. | |
| age_limit_phrases = [ | |
| r'\bunder\s*(1[0-8]|[0-9])\b', | |
| r'\bbelow\s*(1[0-8]|[0-9])\b', | |
| r'\bless than\s*(1[0-8]|[0-9])\b', | |
| r'\b[0-9]{1,2}\s*and under\b' | |
| ] | |
| for phrase_pattern in age_limit_phrases: | |
| if re.search(phrase_pattern, prompt_lower): | |
| # Check if followed by sexual content within reasonable distance | |
| match = re.search(phrase_pattern, prompt_lower) | |
| phrase_end = match.end() | |
| remaining_text = prompt_lower[phrase_end:] | |
| if any(term in remaining_text for term in sexual_terms): | |
| print(f"🚨 Age limit phrase with sexual content: {match.group()}") | |
| return True | |
| return False | |
| def check_age_sexual_combinations(self, prompt: str) -> bool: | |
| """More sophisticated check for age terms combined with sexual content""" | |
| prompt_lower = prompt.lower() | |
| # Check age + sexual content with numerical extraction | |
| age_sexual_patterns = [ | |
| r'(child|kid|baby|toddler|infant|minor|youngster|teen|youth)\s*(?:of|is|was)?\s*(\d{1,2})\s*(years?\s*old)?', | |
| r'(\d{1,2})\s*(years?\s*old|yo|y/o)?\s*(child|kid|baby|toddler|infant|minor|young)', | |
| ] | |
| sexual_content_patterns = ['nude', 'naked', 'sex', 'porn', 'erotic', 'explicit', 'sexual', 'penis', 'vagina', 'pussy', 'cock'] | |
| for age_pattern in age_sexual_patterns: | |
| age_matches = re.finditer(age_pattern, prompt_lower) | |
| for match in age_matches: | |
| # Extract age number from match groups | |
| age = None | |
| for group in match.groups(): | |
| if group and group.isdigit(): | |
| age = int(group) | |
| break | |
| if age is not None and 3 <= age <= 17: # Minor age range | |
| # Look for sexual content nearby (within 50 characters) | |
| start_pos = max(0, match.start() - 50) | |
| end_pos = min(len(prompt_lower), match.end() + 50) | |
| context = prompt_lower[start_pos:end_pos] | |
| if any(term in context for term in sexual_content_patterns): | |
| print(f"🚨 Age-sexual content combination detected: age {age}") | |
| return True | |
| return False | |
| def check_word_proximity(self, prompt: str) -> bool: # Removed unused max_distance parameter | |
| """Second line: check proximity of suspicious words including numerical checks""" | |
| words = prompt.lower().split() | |
| # Original non-numerical proximity checks | |
| for age_words, sexual_words, distance_threshold in self.suspicious_combinations: | |
| for age_word in age_words: | |
| for sexual_word in sexual_words: | |
| if age_word in words and sexual_word in words: | |
| try: | |
| age_idx = words.index(age_word) | |
| sexual_idx = words.index(sexual_word) | |
| if abs(age_idx - sexual_idx) <= distance_threshold: | |
| return True | |
| except ValueError: | |
| continue | |
| return False | |
| def simple_age_check(self, prompt: str) -> bool: # Added self parameter | |
| """Simple numerical age check""" | |
| numbers = re.findall(r'\b\d+\b', prompt) | |
| sexual_terms = ['nude', 'naked', 'sex', 'porn', 'erotic', 'seductive', 'explicit', 'penis', 'vagina', 'pussy', 'cock'] | |
| for num in numbers: | |
| try: | |
| age = int(num) | |
| if 3 <= age <= 17 and any(term in prompt.lower() for term in sexual_terms): | |
| return True | |
| except ValueError: | |
| continue | |
| return False | |
| def check_suspicious_phrases(self, prompt: str) -> bool: | |
| """Third line: check for specific suspicious phrases""" | |
| suspicious_phrases = [ | |
| 'young model', 'teen photoshoot', 'minor content', | |
| 'child art', 'kids only', 'no adults allowed', | |
| 'private session', 'secret photos', 'hidden content' | |
| ] | |
| prompt_lower = prompt.lower() | |
| for phrase in suspicious_phrases: | |
| if phrase in prompt_lower: | |
| return True | |
| return False | |
| def check_evasion_patterns(self, prompt: str) -> bool: | |
| """Fourth line: check for common evasion techniques""" | |
| # Look for character substitutions or obfuscation | |
| evasion_indicators = [ | |
| r'p\w*is', | |
| r'p\w*sy', | |
| r's\w*x', | |
| r'n\w*d\w*', | |
| ] | |
| for pattern in evasion_indicators: | |
| if re.search(pattern, prompt, re.IGNORECASE): | |
| # Additional check: see if it's combined with age-related terms | |
| age_indicators = ['old', 'teen', 'baby', 'kid', 'child', 'minor', 'young'] | |
| if any(indicator in prompt.lower() for indicator in age_indicators): | |
| return True | |
| return False | |
| def check_numerical_age_indicators(self, prompt: str) -> bool: | |
| """Fifth line: check for specific age numbers with sexual content""" | |
| # Extract numbers from prompt | |
| numbers = re.findall(r'\b\d+\b', prompt) | |
| sexual_indicators = ['nude', 'naked', 'sex', 'porn', 'erotic', 'nudify', 'off', 'penis', 'cock', 'pussy', 'vagina', 'insert'] | |
| for num_str in numbers: | |
| try: | |
| age = int(num_str) | |
| # Flag ages typically associated with minors when combined with sexual content | |
| if 3 <= age <= 17: | |
| prompt_lower = prompt.lower() | |
| if any(indicator in prompt_lower for indicator in sexual_indicators): | |
| return True | |
| except ValueError: | |
| continue | |
| return False | |
| def check_contextual_red_flags(self, prompt: str) -> bool: | |
| """Sixth line: check for contextual red flags""" | |
| red_flag_contexts = [ | |
| # Suspicious session types | |
| (r'(private|secret|hidden|exclusive).*photo', ['nude', 'naked', 'photos']), # Fixed this tuple (was missing opening parenthesis) | |
| # Suspicious content descriptions | |
| (r'(special|unique|exclusive|view|angle|pov|shot).*content', ['young', 'minor', 'teen']) | |
| ] | |
| prompt_lower = prompt.lower() | |
| for context_pattern, suspicious_words in red_flag_contexts: | |
| if re.search(context_pattern, prompt_lower): | |
| if any(word in prompt_lower for word in suspicious_words): | |
| return True | |
| return False | |
| def is_content_suspicious(self, prompt: str) -> Tuple[bool, str]: | |
| """Main safety checking function with detailed feedback""" | |
| checks = [ | |
| (self.check_pattern_matching, "Pattern matching detected suspicious content"), | |
| (self.check_word_proximity, "Suspicious word proximity detected"), | |
| (self.check_suspicious_phrases, "Suspicious phrases detected"), | |
| (self.check_evasion_patterns, "Potential evasion patterns detected"), | |
| (self.check_numerical_age_indicators, "Suspicious age indicators with sexual content"), | |
| (self.check_contextual_red_flags, "Contextual red flags detected"), | |
| (self.simple_age_check, "Simple age check detected suspicious content"), # Added the missing simple_age_check | |
| (self.check_numerical_age_context, "Numerical age context check detected suspicious content"), # Added numerical age context check | |
| (self.check_age_sexual_combinations, "Age-sexual combination check detected suspicious content") # Added age-sexual combination check | |
| ] | |
| for check_func, message in checks: | |
| try: | |
| if check_func(prompt): | |
| return True, message | |
| except Exception as e: | |
| print(f"Warning: Safety check {check_func.__name__} failed: {e}") | |
| continue | |
| return False, "Content appears safe" | |
| # Enhanced safety function | |
| def comprehensive_safety_check(prompt: str) -> Tuple[bool, str]: | |
| """Multi-layer safety checking with fallback mechanisms""" | |
| try: | |
| # Initialize the safety checker for each check (ensures fresh state) | |
| safety_checker = ContentSafetyChecker() | |
| # Primary check | |
| is_suspicious, message = safety_checker.is_content_suspicious(prompt) | |
| if is_suspicious: | |
| return True, message | |
| # Fallback checks if primary fails | |
| fallback_checks = [ | |
| lambda p: len(p) > 1000, # Unusually long prompts (potential obfuscation) | |
| lambda p: p.count('"') > 20, # Excessive quotes (potential code injection) | |
| lambda p: '||' in p or '&&' in p, # Shell command operators | |
| lambda p: any(char in p for char in ['<script', 'javascript:', 'onload=']), # Basic XSS | |
| ] | |
| for i, check in enumerate(fallback_checks): | |
| try: | |
| if check(prompt): | |
| return True, f"Fallback safety check #{i+1} triggered" | |
| except Exception: | |
| continue | |
| return False, "All safety checks passed" | |
| except Exception as e: | |
| print(f"Safety check failed, erring on caution: {e}") | |
| return True, "Safety system error - content blocked for caution" | |
| # Usage in your application | |
| def test_prompt(original_prompt: str) -> str: | |
| is_blocked, reason = comprehensive_safety_check(original_prompt) | |
| if is_blocked: | |
| print(f"🚨 SAFETY BLOCK: {reason} - Prompt: {original_prompt[:100]}...") | |
| # Log the blocked content (for monitoring) | |
| try: | |
| with open("safety_blocks.log", "a") as f: | |
| f.write(f"{reason}: {original_prompt}\n") | |
| except Exception: | |
| print("Warning: Could not write to safety log") | |
| return "A professional and appropriate image editing task" # Safe fallback | |
| # If safe, proceed with normal enhancement | |
| return f"Regex safety check passed: {original_prompt}" | |
| import gradio as gr | |
| with gr.Blocks(title="Content Safety Checker | Testing Zone") as demo: | |
| gr.Markdown(""" | |
| # 🛡️ Regex-based Content Safety Checker | |
| Some of you guys need to seek help... | |
| """) | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| input_prompt = gr.Textbox( | |
| label="Test Prompt", | |
| placeholder="Type a prompt to check for safety...", | |
| lines=5, | |
| max_lines=10 | |
| ) | |
| check_button = gr.Button("Check Safety", variant="primary") | |
| clear_button = gr.Button("Clear", variant="secondary") | |
| with gr.Column(scale=2): | |
| output_result = gr.Textbox( | |
| label="Safety Check Result", | |
| interactive=False, | |
| lines=10, | |
| max_lines=15 | |
| ) | |
| safety_info = gr.HTML( | |
| value="<div style='padding:15px; margin-top:15px; background: #f8f9fa; border-radius: 8px;'>" | |
| "<h4>ℹ️ How it works:</h4>" | |
| "<ul>" | |
| "<li>Multi-layer safety checking system</li>" | |
| "<li>Detects age-related terms combined with sexual content</li>" | |
| "<li>Identifies potential CSAM/illegal content patterns</li>" | |
| "<li>Checks for evasion techniques and suspicious combinations</li>" | |
| "</ul>" | |
| "</div>" | |
| ) | |
| def process_prompt(prompt): | |
| if not prompt or not prompt.strip(): | |
| return "Please enter a prompt to test." | |
| is_blocked, reason = comprehensive_safety_check(prompt) | |
| if is_blocked: | |
| return f"🚨 BLOCKED: {reason}\n\nOriginal prompt: {prompt}" | |
| else: | |
| return f"✅ SAFE: {reason}\n\nOriginal prompt: {prompt}" | |
| def clear_inputs(): | |
| return "", "" | |
| check_button.click( | |
| fn=process_prompt, | |
| inputs=input_prompt, | |
| outputs=output_result | |
| ) | |
| input_prompt.submit( | |
| fn=process_prompt, | |
| inputs=input_prompt, | |
| outputs=output_result | |
| ) | |
| clear_button.click( | |
| fn=clear_inputs, | |
| inputs=None, | |
| outputs=[input_prompt, output_result] | |
| ) | |
| demo.launch() |