Spaces:
Running
Running
| """ | |
| Phone Detection App for Hugging Face Spaces | |
| This app uses YOLOv8 to detect phones in real-time through a webcam feed. | |
| When a phone is detected, a warning message is displayed. | |
| """ | |
| import cv2 | |
| import numpy as np | |
| import torch | |
| import time | |
| import os | |
| import gradio as gr | |
| from PIL import Image, ImageDraw, ImageFont | |
| from ultralytics import YOLO | |
| # Configurations | |
| MODEL_PATH = "models/yolov8n.pt" # Path to the model within the repository | |
| TARGET_CLASS = "cell phone" | |
| TARGET_CLASS_ID = 67 # In YOLOv8's COCO dataset | |
| MIN_CONFIDENCE = 0.4 # Minimum confidence threshold for detections | |
| class PhoneDetector: | |
| """ | |
| A class to handle phone detection using YOLOv8 model | |
| """ | |
| def __init__(self, model_path=MODEL_PATH, confidence=MIN_CONFIDENCE): | |
| """ | |
| Initialize the phone detector | |
| Args: | |
| model_path: Path to the YOLOv8 model weights | |
| confidence: Minimum confidence threshold for detections | |
| """ | |
| self.target_class = TARGET_CLASS | |
| self.target_class_id = TARGET_CLASS_ID | |
| self.min_confidence = confidence | |
| # Select device (GPU if available, otherwise CPU) | |
| self.device = "cuda" if torch.cuda.is_available() else "cpu" | |
| print(f"Using device: {self.device}") | |
| # Check if model exists, otherwise use default YOLOv8n | |
| if not os.path.exists(model_path): | |
| print(f"Model not found at {model_path}, using default YOLOv8n") | |
| model_path = "yolov8n.pt" # Will be downloaded automatically by YOLO | |
| # Load model | |
| try: | |
| print(f"Loading YOLOv8 model from {model_path}...") | |
| self.model = YOLO(model_path) | |
| self.model.to(self.device) | |
| print("Model loaded successfully") | |
| except Exception as e: | |
| print(f"Error loading model: {e}") | |
| print("Loading default YOLOv8n model...") | |
| self.model = YOLO("yolov8n.pt") | |
| self.model.to(self.device) | |
| def detect(self, frame): | |
| """ | |
| Detect phones in a frame and add visualization | |
| Args: | |
| frame: Input image frame (numpy array) | |
| Returns: | |
| Processed frame with detection visualization | |
| """ | |
| if frame is None: | |
| return None | |
| # Convert to RGB if grayscale | |
| if len(frame.shape) == 2: | |
| frame = cv2.cvtColor(frame, cv2.COLOR_GRAY2RGB) | |
| elif frame.shape[2] == 4: # If RGBA | |
| frame = cv2.cvtColor(frame, cv2.COLOR_RGBA2RGB) | |
| # Get frame dimensions | |
| (h, w) = frame.shape[:2] | |
| # Convert to PIL Image for easier text rendering | |
| pil_image = Image.fromarray(frame) | |
| draw = ImageDraw.Draw(pil_image) | |
| # Try to load a nicer font, fall back to default if not available | |
| try: | |
| font = ImageFont.truetype("DejaVuSans.ttf", 25) | |
| small_font = ImageFont.truetype("DejaVuSans.ttf", 15) | |
| except IOError: | |
| font = ImageFont.load_default() | |
| small_font = ImageFont.load_default() | |
| # Perform detection with YOLOv8 | |
| with torch.no_grad(): # Disable gradient calculation for inference | |
| results = self.model.predict(frame, conf=self.min_confidence, verbose=False) | |
| # Flag to track if a phone is detected in this frame | |
| phone_detected = False | |
| # Process detection results | |
| if len(results) > 0: | |
| for result in results: | |
| boxes = result.boxes | |
| for box in boxes: | |
| # Get class ID | |
| cls_id = int(box.cls[0].item()) | |
| class_name = result.names[cls_id] | |
| # Check if the detected object is a cell phone | |
| if class_name == self.target_class or cls_id == self.target_class_id: | |
| phone_detected = True | |
| # Get confidence score | |
| conf = float(box.conf[0].item()) | |
| # Get bounding box coordinates | |
| x1, y1, x2, y2 = map(int, box.xyxy[0]) | |
| # Draw bounding box on PIL image | |
| draw.rectangle([(x1, y1), (x2, y2)], outline="red", width=3) | |
| # Display confidence and class | |
| label = f"{class_name}: {conf:.2f}" | |
| y_label = y1 - 15 if y1 - 15 > 15 else y1 + 15 | |
| draw.text((x1, y_label), label, fill="red", font=small_font) | |
| # Display warning message if phone is detected | |
| if phone_detected: | |
| warning_text = "WARNING: Phone Detected!" | |
| # Measure text size for centering (implementation differs based on PIL version) | |
| try: | |
| # For newer PIL versions | |
| text_width = draw.textlength(warning_text, font=font) | |
| except AttributeError: | |
| # For older PIL versions | |
| text_width = font.getmask(warning_text).getbbox()[2] | |
| text_x = (w - text_width) // 2 | |
| text_y = h // 2 | |
| # Draw semi-transparent red rectangle for warning | |
| overlay = Image.new('RGBA', pil_image.size, (0, 0, 0, 0)) | |
| overlay_draw = ImageDraw.Draw(overlay) | |
| overlay_draw.rectangle([(0, text_y - 40), (w, text_y + 10)], fill=(255, 0, 0, 128)) | |
| pil_image = Image.alpha_composite(pil_image.convert('RGBA'), overlay).convert('RGB') | |
| draw = ImageDraw.Draw(pil_image) | |
| # Draw warning text | |
| draw.text((text_x, text_y - 30), warning_text, fill="white", font=font) | |
| # Add processing info at the bottom | |
| device_text = f"Running on: {self.device}" | |
| draw.text((10, h - 30), device_text, fill="green", font=small_font) | |
| # Convert back to numpy array | |
| result_frame = np.array(pil_image) | |
| return result_frame | |
| # Initialize the detector | |
| detector = PhoneDetector() | |
| # Function to process webcam frames | |
| def process_webcam(image): | |
| """ | |
| Process webcam input for Gradio interface | |
| Args: | |
| image: Input image from Gradio | |
| Returns: | |
| Processed image with phone detection visualization | |
| """ | |
| if image is None: | |
| return None | |
| # Process the frame | |
| result_frame = detector.detect(image) | |
| if result_frame is None: | |
| return image | |
| return result_frame | |
| # Create Gradio interface | |
| title = "Phone Detection with YOLOv8" | |
| description = """ | |
| ## Real-time Phone Detection | |
| This app uses YOLOv8 to detect phones in real-time through your webcam. | |
| When a phone is detected, a warning message is displayed. | |
| ### How it works: | |
| 1. The webcam captures your video feed | |
| 2. Each frame is analyzed by YOLOv8 to detect phones | |
| 3. If a phone is detected, a warning message appears | |
| ### Notes: | |
| - You may need to give permission for camera access | |
| - The app works best with good lighting conditions | |
| - The model detects cell phones only | |
| """ | |
| # Create Gradio blocks interface | |
| with gr.Blocks(title=title) as demo: | |
| gr.Markdown(description) | |
| with gr.Row(): | |
| with gr.Column(): | |
| # Webcam input with streaming | |
| webcam_input = gr.Image(label="Webcam", sources=["webcam"], streaming=True) | |
| with gr.Column(): | |
| output_display = gr.Image(label="Detection Result") | |
| # Stream processing | |
| webcam_input.stream(process_webcam, inputs=webcam_input, outputs=output_display) | |
| gr.Markdown(""" | |
| ### Technical Details | |
| - Model: YOLOv8n (optimized for speed) | |
| - Target class: "cell phone" | |
| - Confidence threshold: 0.4 | |
| This application was developed using Ultralytics YOLOv8, Gradio, and OpenCV. | |
| """) | |
| # Launch the interface | |
| demo.launch() |