""" Phone Detection App for Hugging Face Spaces This app uses YOLOv8 to detect phones in real-time through a webcam feed. When a phone is detected, a warning message is displayed. """ import cv2 import numpy as np import torch import time import os import gradio as gr from PIL import Image, ImageDraw, ImageFont from ultralytics import YOLO # Configurations MODEL_PATH = "models/yolov8n.pt" # Path to the model within the repository TARGET_CLASS = "cell phone" TARGET_CLASS_ID = 67 # In YOLOv8's COCO dataset MIN_CONFIDENCE = 0.4 # Minimum confidence threshold for detections class PhoneDetector: """ A class to handle phone detection using YOLOv8 model """ def __init__(self, model_path=MODEL_PATH, confidence=MIN_CONFIDENCE): """ Initialize the phone detector Args: model_path: Path to the YOLOv8 model weights confidence: Minimum confidence threshold for detections """ self.target_class = TARGET_CLASS self.target_class_id = TARGET_CLASS_ID self.min_confidence = confidence # Select device (GPU if available, otherwise CPU) self.device = "cuda" if torch.cuda.is_available() else "cpu" print(f"Using device: {self.device}") # Check if model exists, otherwise use default YOLOv8n if not os.path.exists(model_path): print(f"Model not found at {model_path}, using default YOLOv8n") model_path = "yolov8n.pt" # Will be downloaded automatically by YOLO # Load model try: print(f"Loading YOLOv8 model from {model_path}...") self.model = YOLO(model_path) self.model.to(self.device) print("Model loaded successfully") except Exception as e: print(f"Error loading model: {e}") print("Loading default YOLOv8n model...") self.model = YOLO("yolov8n.pt") self.model.to(self.device) def detect(self, frame): """ Detect phones in a frame and add visualization Args: frame: Input image frame (numpy array) Returns: Processed frame with detection visualization """ if frame is None: return None # Convert to RGB if grayscale if len(frame.shape) == 2: frame = cv2.cvtColor(frame, cv2.COLOR_GRAY2RGB) elif frame.shape[2] == 4: # If RGBA frame = cv2.cvtColor(frame, cv2.COLOR_RGBA2RGB) # Get frame dimensions (h, w) = frame.shape[:2] # Convert to PIL Image for easier text rendering pil_image = Image.fromarray(frame) draw = ImageDraw.Draw(pil_image) # Try to load a nicer font, fall back to default if not available try: font = ImageFont.truetype("DejaVuSans.ttf", 25) small_font = ImageFont.truetype("DejaVuSans.ttf", 15) except IOError: font = ImageFont.load_default() small_font = ImageFont.load_default() # Perform detection with YOLOv8 with torch.no_grad(): # Disable gradient calculation for inference results = self.model.predict(frame, conf=self.min_confidence, verbose=False) # Flag to track if a phone is detected in this frame phone_detected = False # Process detection results if len(results) > 0: for result in results: boxes = result.boxes for box in boxes: # Get class ID cls_id = int(box.cls[0].item()) class_name = result.names[cls_id] # Check if the detected object is a cell phone if class_name == self.target_class or cls_id == self.target_class_id: phone_detected = True # Get confidence score conf = float(box.conf[0].item()) # Get bounding box coordinates x1, y1, x2, y2 = map(int, box.xyxy[0]) # Draw bounding box on PIL image draw.rectangle([(x1, y1), (x2, y2)], outline="red", width=3) # Display confidence and class label = f"{class_name}: {conf:.2f}" y_label = y1 - 15 if y1 - 15 > 15 else y1 + 15 draw.text((x1, y_label), label, fill="red", font=small_font) # Display warning message if phone is detected if phone_detected: warning_text = "WARNING: Phone Detected!" # Measure text size for centering (implementation differs based on PIL version) try: # For newer PIL versions text_width = draw.textlength(warning_text, font=font) except AttributeError: # For older PIL versions text_width = font.getmask(warning_text).getbbox()[2] text_x = (w - text_width) // 2 text_y = h // 2 # Draw semi-transparent red rectangle for warning overlay = Image.new('RGBA', pil_image.size, (0, 0, 0, 0)) overlay_draw = ImageDraw.Draw(overlay) overlay_draw.rectangle([(0, text_y - 40), (w, text_y + 10)], fill=(255, 0, 0, 128)) pil_image = Image.alpha_composite(pil_image.convert('RGBA'), overlay).convert('RGB') draw = ImageDraw.Draw(pil_image) # Draw warning text draw.text((text_x, text_y - 30), warning_text, fill="white", font=font) # Add processing info at the bottom device_text = f"Running on: {self.device}" draw.text((10, h - 30), device_text, fill="green", font=small_font) # Convert back to numpy array result_frame = np.array(pil_image) return result_frame # Initialize the detector detector = PhoneDetector() # Function to process webcam frames def process_webcam(image): """ Process webcam input for Gradio interface Args: image: Input image from Gradio Returns: Processed image with phone detection visualization """ if image is None: return None # Process the frame result_frame = detector.detect(image) if result_frame is None: return image return result_frame # Create Gradio interface title = "Phone Detection with YOLOv8" description = """ ## Real-time Phone Detection This app uses YOLOv8 to detect phones in real-time through your webcam. When a phone is detected, a warning message is displayed. ### How it works: 1. The webcam captures your video feed 2. Each frame is analyzed by YOLOv8 to detect phones 3. If a phone is detected, a warning message appears ### Notes: - You may need to give permission for camera access - The app works best with good lighting conditions - The model detects cell phones only """ # Create Gradio blocks interface with gr.Blocks(title=title) as demo: gr.Markdown(description) with gr.Row(): with gr.Column(): # Webcam input with streaming webcam_input = gr.Image(label="Webcam", sources=["webcam"], streaming=True) with gr.Column(): output_display = gr.Image(label="Detection Result") # Stream processing webcam_input.stream(process_webcam, inputs=webcam_input, outputs=output_display) gr.Markdown(""" ### Technical Details - Model: YOLOv8n (optimized for speed) - Target class: "cell phone" - Confidence threshold: 0.4 This application was developed using Ultralytics YOLOv8, Gradio, and OpenCV. """) # Launch the interface demo.launch()