Spaces:

Omarrran
/

yolo_phone_deduction

Running

File size: 8,035 Bytes

"""
Phone Detection App for Hugging Face Spaces

This app uses YOLOv8 to detect phones in real-time through a webcam feed.
When a phone is detected, a warning message is displayed.
"""

import cv2
import numpy as np
import torch
import time
import os
import gradio as gr
from PIL import Image, ImageDraw, ImageFont
from ultralytics import YOLO

# Configurations
MODEL_PATH = "models/yolov8n.pt"  # Path to the model within the repository
TARGET_CLASS = "cell phone"
TARGET_CLASS_ID = 67  # In YOLOv8's COCO dataset
MIN_CONFIDENCE = 0.4  # Minimum confidence threshold for detections

class PhoneDetector:
    """
    A class to handle phone detection using YOLOv8 model
    """
    def __init__(self, model_path=MODEL_PATH, confidence=MIN_CONFIDENCE):
        """
        Initialize the phone detector

        Args:
            model_path: Path to the YOLOv8 model weights
            confidence: Minimum confidence threshold for detections
        """
        self.target_class = TARGET_CLASS
        self.target_class_id = TARGET_CLASS_ID
        self.min_confidence = confidence
        
        # Select device (GPU if available, otherwise CPU)
        self.device = "cuda" if torch.cuda.is_available() else "cpu"
        print(f"Using device: {self.device}")
        
        # Check if model exists, otherwise use default YOLOv8n
        if not os.path.exists(model_path):
            print(f"Model not found at {model_path}, using default YOLOv8n")
            model_path = "yolov8n.pt"  # Will be downloaded automatically by YOLO
            
        # Load model
        try:
            print(f"Loading YOLOv8 model from {model_path}...")
            self.model = YOLO(model_path)
            self.model.to(self.device)
            print("Model loaded successfully")
        except Exception as e:
            print(f"Error loading model: {e}")
            print("Loading default YOLOv8n model...")
            self.model = YOLO("yolov8n.pt")
            self.model.to(self.device)

    def detect(self, frame):
        """
        Detect phones in a frame and add visualization

        Args:
            frame: Input image frame (numpy array)

        Returns:
            Processed frame with detection visualization
        """
        if frame is None:
            return None
        
        # Convert to RGB if grayscale
        if len(frame.shape) == 2:
            frame = cv2.cvtColor(frame, cv2.COLOR_GRAY2RGB)
        elif frame.shape[2] == 4:  # If RGBA
            frame = cv2.cvtColor(frame, cv2.COLOR_RGBA2RGB)
        
        # Get frame dimensions
        (h, w) = frame.shape[:2]
        
        # Convert to PIL Image for easier text rendering
        pil_image = Image.fromarray(frame)
        draw = ImageDraw.Draw(pil_image)
        
        # Try to load a nicer font, fall back to default if not available
        try:
            font = ImageFont.truetype("DejaVuSans.ttf", 25)
            small_font = ImageFont.truetype("DejaVuSans.ttf", 15)
        except IOError:
            font = ImageFont.load_default()
            small_font = ImageFont.load_default()
        
        # Perform detection with YOLOv8
        with torch.no_grad():  # Disable gradient calculation for inference
            results = self.model.predict(frame, conf=self.min_confidence, verbose=False)
        
        # Flag to track if a phone is detected in this frame
        phone_detected = False
        
        # Process detection results
        if len(results) > 0:
            for result in results:
                boxes = result.boxes
                for box in boxes:
                    # Get class ID
                    cls_id = int(box.cls[0].item())
                    class_name = result.names[cls_id]
                    
                    # Check if the detected object is a cell phone
                    if class_name == self.target_class or cls_id == self.target_class_id:
                        phone_detected = True
                        
                        # Get confidence score
                        conf = float(box.conf[0].item())
                        
                        # Get bounding box coordinates
                        x1, y1, x2, y2 = map(int, box.xyxy[0])
                        
                        # Draw bounding box on PIL image
                        draw.rectangle([(x1, y1), (x2, y2)], outline="red", width=3)
                        
                        # Display confidence and class
                        label = f"{class_name}: {conf:.2f}"
                        y_label = y1 - 15 if y1 - 15 > 15 else y1 + 15
                        draw.text((x1, y_label), label, fill="red", font=small_font)
        
        # Display warning message if phone is detected
        if phone_detected:
            warning_text = "WARNING: Phone Detected!"
            
            # Measure text size for centering (implementation differs based on PIL version)
            try:
                # For newer PIL versions
                text_width = draw.textlength(warning_text, font=font)  
            except AttributeError:
                # For older PIL versions
                text_width = font.getmask(warning_text).getbbox()[2]
            
            text_x = (w - text_width) // 2
            text_y = h // 2
            
            # Draw semi-transparent red rectangle for warning
            overlay = Image.new('RGBA', pil_image.size, (0, 0, 0, 0))
            overlay_draw = ImageDraw.Draw(overlay)
            overlay_draw.rectangle([(0, text_y - 40), (w, text_y + 10)], fill=(255, 0, 0, 128))
            pil_image = Image.alpha_composite(pil_image.convert('RGBA'), overlay).convert('RGB')
            draw = ImageDraw.Draw(pil_image)
            
            # Draw warning text
            draw.text((text_x, text_y - 30), warning_text, fill="white", font=font)
        
        # Add processing info at the bottom
        device_text = f"Running on: {self.device}"
        draw.text((10, h - 30), device_text, fill="green", font=small_font)
        
        # Convert back to numpy array
        result_frame = np.array(pil_image)
        
        return result_frame

# Initialize the detector
detector = PhoneDetector()

# Function to process webcam frames
def process_webcam(image):
    """
    Process webcam input for Gradio interface
    
    Args:
        image: Input image from Gradio
        
    Returns:
        Processed image with phone detection visualization
    """
    if image is None:
        return None
    
    # Process the frame
    result_frame = detector.detect(image)
    
    if result_frame is None:
        return image
    
    return result_frame

# Create Gradio interface
title = "Phone Detection with YOLOv8"
description = """
## Real-time Phone Detection

This app uses YOLOv8 to detect phones in real-time through your webcam.
When a phone is detected, a warning message is displayed.

### How it works:
1. The webcam captures your video feed
2. Each frame is analyzed by YOLOv8 to detect phones
3. If a phone is detected, a warning message appears

### Notes:
- You may need to give permission for camera access
- The app works best with good lighting conditions
- The model detects cell phones only
"""

# Create Gradio blocks interface
with gr.Blocks(title=title) as demo:
    gr.Markdown(description)
    
    with gr.Row():
        with gr.Column():
            # Webcam input with streaming
            webcam_input = gr.Image(label="Webcam", sources=["webcam"], streaming=True)
        
        with gr.Column():
            output_display = gr.Image(label="Detection Result")
    
    # Stream processing
    webcam_input.stream(process_webcam, inputs=webcam_input, outputs=output_display)
    
    gr.Markdown("""
    ### Technical Details
    - Model: YOLOv8n (optimized for speed)
    - Target class: "cell phone"
    - Confidence threshold: 0.4
    
    This application was developed using Ultralytics YOLOv8, Gradio, and OpenCV.
    """)

# Launch the interface
demo.launch()