Spaces:
Running
Running
File size: 8,035 Bytes
d02ce50 ce0c645 d02ce50 ce0c645 d02ce50 ce0c645 d02ce50 ce0c645 d02ce50 ce0c645 d02ce50 ce0c645 d02ce50 ce0c645 d02ce50 ce0c645 d02ce50 ce0c645 d02ce50 ce0c645 d02ce50 ce0c645 d02ce50 ce0c645 d02ce50 ce0c645 d02ce50 ce0c645 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 |
"""
Phone Detection App for Hugging Face Spaces
This app uses YOLOv8 to detect phones in real-time through a webcam feed.
When a phone is detected, a warning message is displayed.
"""
import cv2
import numpy as np
import torch
import time
import os
import gradio as gr
from PIL import Image, ImageDraw, ImageFont
from ultralytics import YOLO
# Configurations
MODEL_PATH = "models/yolov8n.pt" # Path to the model within the repository
TARGET_CLASS = "cell phone"
TARGET_CLASS_ID = 67 # In YOLOv8's COCO dataset
MIN_CONFIDENCE = 0.4 # Minimum confidence threshold for detections
class PhoneDetector:
"""
A class to handle phone detection using YOLOv8 model
"""
def __init__(self, model_path=MODEL_PATH, confidence=MIN_CONFIDENCE):
"""
Initialize the phone detector
Args:
model_path: Path to the YOLOv8 model weights
confidence: Minimum confidence threshold for detections
"""
self.target_class = TARGET_CLASS
self.target_class_id = TARGET_CLASS_ID
self.min_confidence = confidence
# Select device (GPU if available, otherwise CPU)
self.device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {self.device}")
# Check if model exists, otherwise use default YOLOv8n
if not os.path.exists(model_path):
print(f"Model not found at {model_path}, using default YOLOv8n")
model_path = "yolov8n.pt" # Will be downloaded automatically by YOLO
# Load model
try:
print(f"Loading YOLOv8 model from {model_path}...")
self.model = YOLO(model_path)
self.model.to(self.device)
print("Model loaded successfully")
except Exception as e:
print(f"Error loading model: {e}")
print("Loading default YOLOv8n model...")
self.model = YOLO("yolov8n.pt")
self.model.to(self.device)
def detect(self, frame):
"""
Detect phones in a frame and add visualization
Args:
frame: Input image frame (numpy array)
Returns:
Processed frame with detection visualization
"""
if frame is None:
return None
# Convert to RGB if grayscale
if len(frame.shape) == 2:
frame = cv2.cvtColor(frame, cv2.COLOR_GRAY2RGB)
elif frame.shape[2] == 4: # If RGBA
frame = cv2.cvtColor(frame, cv2.COLOR_RGBA2RGB)
# Get frame dimensions
(h, w) = frame.shape[:2]
# Convert to PIL Image for easier text rendering
pil_image = Image.fromarray(frame)
draw = ImageDraw.Draw(pil_image)
# Try to load a nicer font, fall back to default if not available
try:
font = ImageFont.truetype("DejaVuSans.ttf", 25)
small_font = ImageFont.truetype("DejaVuSans.ttf", 15)
except IOError:
font = ImageFont.load_default()
small_font = ImageFont.load_default()
# Perform detection with YOLOv8
with torch.no_grad(): # Disable gradient calculation for inference
results = self.model.predict(frame, conf=self.min_confidence, verbose=False)
# Flag to track if a phone is detected in this frame
phone_detected = False
# Process detection results
if len(results) > 0:
for result in results:
boxes = result.boxes
for box in boxes:
# Get class ID
cls_id = int(box.cls[0].item())
class_name = result.names[cls_id]
# Check if the detected object is a cell phone
if class_name == self.target_class or cls_id == self.target_class_id:
phone_detected = True
# Get confidence score
conf = float(box.conf[0].item())
# Get bounding box coordinates
x1, y1, x2, y2 = map(int, box.xyxy[0])
# Draw bounding box on PIL image
draw.rectangle([(x1, y1), (x2, y2)], outline="red", width=3)
# Display confidence and class
label = f"{class_name}: {conf:.2f}"
y_label = y1 - 15 if y1 - 15 > 15 else y1 + 15
draw.text((x1, y_label), label, fill="red", font=small_font)
# Display warning message if phone is detected
if phone_detected:
warning_text = "WARNING: Phone Detected!"
# Measure text size for centering (implementation differs based on PIL version)
try:
# For newer PIL versions
text_width = draw.textlength(warning_text, font=font)
except AttributeError:
# For older PIL versions
text_width = font.getmask(warning_text).getbbox()[2]
text_x = (w - text_width) // 2
text_y = h // 2
# Draw semi-transparent red rectangle for warning
overlay = Image.new('RGBA', pil_image.size, (0, 0, 0, 0))
overlay_draw = ImageDraw.Draw(overlay)
overlay_draw.rectangle([(0, text_y - 40), (w, text_y + 10)], fill=(255, 0, 0, 128))
pil_image = Image.alpha_composite(pil_image.convert('RGBA'), overlay).convert('RGB')
draw = ImageDraw.Draw(pil_image)
# Draw warning text
draw.text((text_x, text_y - 30), warning_text, fill="white", font=font)
# Add processing info at the bottom
device_text = f"Running on: {self.device}"
draw.text((10, h - 30), device_text, fill="green", font=small_font)
# Convert back to numpy array
result_frame = np.array(pil_image)
return result_frame
# Initialize the detector
detector = PhoneDetector()
# Function to process webcam frames
def process_webcam(image):
"""
Process webcam input for Gradio interface
Args:
image: Input image from Gradio
Returns:
Processed image with phone detection visualization
"""
if image is None:
return None
# Process the frame
result_frame = detector.detect(image)
if result_frame is None:
return image
return result_frame
# Create Gradio interface
title = "Phone Detection with YOLOv8"
description = """
## Real-time Phone Detection
This app uses YOLOv8 to detect phones in real-time through your webcam.
When a phone is detected, a warning message is displayed.
### How it works:
1. The webcam captures your video feed
2. Each frame is analyzed by YOLOv8 to detect phones
3. If a phone is detected, a warning message appears
### Notes:
- You may need to give permission for camera access
- The app works best with good lighting conditions
- The model detects cell phones only
"""
# Create Gradio blocks interface
with gr.Blocks(title=title) as demo:
gr.Markdown(description)
with gr.Row():
with gr.Column():
# Webcam input with streaming
webcam_input = gr.Image(label="Webcam", sources=["webcam"], streaming=True)
with gr.Column():
output_display = gr.Image(label="Detection Result")
# Stream processing
webcam_input.stream(process_webcam, inputs=webcam_input, outputs=output_display)
gr.Markdown("""
### Technical Details
- Model: YOLOv8n (optimized for speed)
- Target class: "cell phone"
- Confidence threshold: 0.4
This application was developed using Ultralytics YOLOv8, Gradio, and OpenCV.
""")
# Launch the interface
demo.launch() |