Spaces:

NHLOCAL
/

Kav-Venaki

Runtime error

App Files Files Community

NHLOCAL commited on Jan 5, 2025

Commit

af8184f

1 Parent(s): 9de88c1

Add application file

Browse files

Files changed (6) hide show

__pycache__/backend.cpython-310.pyc +0 -0
__pycache__/webui.cpython-310.pyc +0 -0
app.py +63 -0
backend.py +321 -0
example_images/example.jpg +0 -0
requirements.txt +10 -0

__pycache__/backend.cpython-310.pyc ADDED Viewed

Binary file (8.34 kB). View file

__pycache__/webui.cpython-310.pyc ADDED Viewed

Binary file (2.09 kB). View file

app.py ADDED Viewed

	@@ -0,0 +1,63 @@

+import gradio as gr
+from PIL import Image
+from backend import process_image
+def inference(image: Image.Image, gemini_api_key: str):
+    """
+    פונקציה שמבצעת זיהוי וטשטוש נשים בתמונה,
+    ומעדכנת את סרגל ההתקדמות בהתאם.
+    """
+    if not gemini_api_key.strip():
+        raise gr.Error("אנא הכנס/י מפתח API של Gemini על מנת להמשיך.")
+    progress = gr.Progress()  # אובייקט לעדכון ההתקדמות
+    def progress_callback(fraction, description=""):
+        """
+        פונקציה פנימית שתיקרא מ-backend בכל שלב.
+        fraction - ערך בין 0 ל-1 (לדוגמה 0.3 = 30%)
+        description - מלל להסבר השלב
+        """
+        progress(fraction, desc=description)
+    # כעת נקרא ל-process_image עם אפשרות לעדכן התקדמות
+    result_image = process_image(image, gemini_api_key, progress_callback=progress_callback)
+    return result_image
+title_str = "זיהוי וטשטוש נשים בתמונה"
+description_str = """<p style='text-align: right; direction: rtl'>
+    העלה תמונה, הכנס את מפתח ה־API של Gemini,<br>
+    ולחץ על "הרץ" כדי לזהות ולטשטש נשים בתמונה באופן אוטומטי.
+</p>
+"""
+# נתיב לתמונת דוגמה
+EXAMPLE_IMAGE = "example_images/example.jpg"
+demo = gr.Interface(
+    fn=inference,
+    inputs=[
+        gr.Image(type="pil", label="בחר/י תמונה לניתוח או גרור/י אותה לכאן"),
+        gr.Textbox(
+            label="מפתח API של Gemini",
+            placeholder="הכנס/י את מפתח ה-API שלך כאן",
+            type="password"
+        )
+    ],
+    outputs=gr.Image(type="pil", label="תוצאה סופית"),
+    title=title_str,
+    description=description_str,
+    examples=[
+        [EXAMPLE_IMAGE]  # תמונה בלבד, ללא מפתח API
+    ],
+    allow_flagging="never",
+    theme="compact"  # עיצוב קליל לממשק
+)
+if __name__ == "__main__":
+    # ניתן להגדיר share=True אם רוצים לשתף מחוץ לרשת המקומית
+    demo.launch(server_name="127.0.0.1", server_port=7860, debug=True)

backend.py ADDED Viewed

	@@ -0,0 +1,321 @@

+import os
+import base64
+import json
+import requests
+import torch
+import numpy as np
+import cv2
+from PIL import Image, ImageFilter
+from scipy.ndimage import binary_dilation
+# -----------------------------
+# 1) הגדרת המפתח API של Gemini כפרמטר
+# -----------------------------
+SYSTEM_INST = """\
+You are given an image. You must return information about the main character in the image.
+Do not write anything else beyond this!
+**Guidelines for identifying a character in the image:**
+1. **Male:**
+   - Infant (0–2) → "baby boy"
+   - Toddler (2–5) → "toddler boy"
+   - Child (6–11) → "boy"
+   - Teenager (12–17) → "teen boy"
+   - Young adul (18–35) → "young man"
+   - adul (36–59) → "man"
+   - Elderly (60+) → "elderly man"
+2. **Female:**
+   - Infant (0–2) → "baby girl"
+   - Toddler (2–5) → "toddler girl"
+   - Child (6–11) → "girl"
+   - Teenager (12–17) → "teen girl"
+   - Young adul (18–35) → "young woman"
+   - adul (36–59) → "woman"
+   - Elderly (60+) → "elderly woman"
+3. **Unclear identification:**
+   - Ambiguous character → "unidentified"
+   - Ambiguous infant/toddler → "baby" or "toddler"
+4. **No character in the image:**
+   - Respond: "no person"
+5. **Multiple characters:**
+   - Identify the most central or prominent character.
+Notes:
+- If data is insufficient to classify → "insufficient data".
+"""
+conversation = []  # נשמור כאן את השיחה הנוכחית
+female_keywords = {
+    "baby girl", "toddler girl", "girl",
+    "teen girl", "young woman", "woman",
+    "elderly woman"
+}
+def is_female_from_text(gemini_text: str) -> bool:
+    """בודק האם התשובה מ-Gemini מצביעה על אישה לפי מילות המפתח שהוגדרו."""
+    return gemini_text.lower().strip() in female_keywords
+def encode_image_to_base64(image: Image.Image) -> str:
+    import io
+    buffer = io.BytesIO()
+    image.save(buffer, format='JPEG')
+    encoded_str = base64.b64encode(buffer.getvalue()).decode('utf-8')
+    return encoded_str
+def add_user_text(message: str):
+    conversation.append({
+        "role": "user",
+        "parts": [
+            {"text": message}
+        ]
+    })
+def add_user_image_from_pil(image: Image.Image, mime_type: str = "image/jpeg"):
+    encoded_str = encode_image_to_base64(image)
+    conversation.append({
+        "role": "user",
+        "parts": [
+            {
+                "inline_data": {
+                    "mime_type": mime_type,
+                    "data": encoded_str
+                }
+            }
+        ]
+    })
+def send_and_receive(api_key: str) -> str:
+    url = "https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash:generateContent"
+    params = {"key": api_key}
+    headers = {"Content-Type": "application/json"}
+    payload = {
+        "systemInstruction": {
+            "role": "system",
+            "parts": [
+                {"text": SYSTEM_INST}
+            ]
+        },
+        "contents": conversation
+    }
+    response = requests.post(url, params=params, headers=headers, json=payload)
+    if response.status_code != 200:
+        print(f"[Gemini] שגיאה בסטטוס קוד: {response.status_code}")
+        return "NO_ANSWER"
+    resp_json = response.json()
+    candidates = resp_json.get("candidates", [])
+    if not candidates:
+        print("[Gemini] לא התקבלה תשובה.")
+        return "NO_ANSWER"
+    model_content = candidates[0].get("content", {})
+    model_parts = model_content.get("parts", [])
+    if not model_parts:
+        print("[Gemini] לא נמצא תוכן בתשובת המודל.")
+        return "NO_ANSWER"
+    model_text = model_parts[0].get("text", "").strip()
+    conversation.append({
+        "role": "model",
+        "parts": [
+            {"text": model_text}
+        ]
+    })
+    return model_text
+# -----------------------------
+# 3) טעינת מודל YOLO
+# -----------------------------
+from ultralytics import YOLO
+YOLO_MODEL_PATH =  '../../models/yolo11m.pt'
+try:
+    yolo_model = YOLO(YOLO_MODEL_PATH)
+    yolo_model.to("cpu")
+except Exception as e:
+    print(f"[YOLO] לא מצליח לטעון את המודל בנתיב: {YOLO_MODEL_PATH}")
+    yolo_model = None
+TARGET_CLASS = "person"
+CONF_THRESHOLD = 0.2
+# -----------------------------
+# 4) הכנה ל-SAM2
+# -----------------------------
+try:
+    from hydra import initialize
+    from sam2.sam2_image_predictor import SAM2ImagePredictor
+    SAM2_CONFIG_PATH = "../../models/sam2.1/"
+    SAM2_MODEL_NAME = "facebook/sam2.1-hiera-tiny"
+    sam2_predictor = None
+    device = "cuda" if torch.cuda.is_available() else "cpu"
+    with initialize(config_path=SAM2_CONFIG_PATH):
+        sam2_predictor = SAM2ImagePredictor.from_pretrained(SAM2_MODEL_NAME)
+    sam2_predictor.model.to(device)
+except Exception as e:
+    print("[SAM2] לא מצליח לטעון את SAM2. ודא שהנתיב והקונפיג נכונים.")
+    sam2_predictor = None
+# -----------------------------
+# 5) פונקציית טשטוש
+# -----------------------------
+def blur_regions_with_mask(
+    image: Image.Image,
+    mask: np.ndarray,
+    blur_radius=20,
+    pixel_size=20,
+    expansion_pixels=1
+):
+    processed_image = image.copy()
+    img_np = np.array(processed_image)
+    structure = np.ones((expansion_pixels, expansion_pixels), dtype=bool)
+    expanded_mask = binary_dilation(mask, structure=structure)
+    blurred_whole = processed_image.filter(ImageFilter.GaussianBlur(radius=blur_radius))
+    blurred_whole_np = np.array(blurred_whole)
+    ys, xs = np.where(expanded_mask)
+    if len(xs) == 0 or len(ys) == 0:
+        return processed_image
+    x_min, x_max = xs.min(), xs.max()
+    y_min, y_max = ys.min(), ys.max()
+    region = blurred_whole_np[y_min:y_max, x_min:x_max]
+    from PIL import Image as PILImage
+    small = PILImage.fromarray(region).resize(
+        ((x_max - x_min) // pixel_size, (y_max - y_min) // pixel_size),
+        resample=Image.BILINEAR
+    )
+    pixelated = small.resize((x_max - x_min, y_max - y_min), PILImage.NEAREST)
+    pixelated_np = np.array(pixelated)
+    combined = img_np.copy()
+    mask_region = expanded_mask[y_min:y_max, x_min:x_max]
+    combined[y_min:y_max, x_min:x_max][mask_region] = pixelated_np[mask_region]
+    return Image.fromarray(combined)
+# -----------------------------
+# 6) הפונקציה המרכזית
+# -----------------------------
+def process_image(
+    pil_image: Image.Image,
+    gemini_api_key: str,
+    progress_callback=None
+) -> Image.Image:
+    """
+    פונקציה המקבלת תמונת PIL, מפתח API של Gemini, ומחזירה את התמונה לאחר טשטוש נשים,
+    תוך שלבי התקדמות מוגדרים:
+      - זיהוי אנשים ב-YOLO
+      - זיהוי אם אישה בעזרת Gemini
+      - פילוח באמצעות SAM2
+      - טשטוש
+    פרמטר progress_callback: פונקציה לקבלת (fraction, description)
+    """
+    if progress_callback is None:
+        # אם לא הועברה פונקציה לעדכון התקדמות, ניצור פונקציה ריקה
+        def progress_callback(x, desc=""):
+            pass
+    conversation.clear()
+    add_user_text("Processing a new image (backend)!")
+    # 1) שלב YOLO
+    progress_callback(0.0, "מתחיל זיהוי אנשים (YOLO)...")
+    if yolo_model is None:
+        print("[process_image] מודל YOLO לא נטען כראוי.")
+        return pil_image
+    np_image = np.array(pil_image)
+    results = yolo_model.predict(np_image)
+    bboxes_person = []
+    for result in results:
+        boxes = result.boxes
+        for box in boxes:
+            cls_name = yolo_model.names[int(box.cls)]
+            conf = box.conf.item()
+            if cls_name == TARGET_CLASS and conf >= CONF_THRESHOLD:
+                x1, y1, x2, y2 = box.xyxy[0]
+                bboxes_person.append([int(x1), int(y1), int(x2), int(y2)])
+    progress_callback(0.1, f"נמצאו {len(bboxes_person)} בוקסי 'person' ב-YOLO")
+    # 2) שלב Gemini (עבור כל בוקס בנפרד)
+    women_boxes = []
+    n_bboxes = len(bboxes_person) if bboxes_person else 1
+    for i, bbox in enumerate(bboxes_person, start=1):
+        fraction = 0.1 + (0.5 * i / n_bboxes)  # נניח חצי מההתקדמות מוקצה ל-Gemini
+        progress_callback(fraction, f"[Gemini] בודק בוקס #{i} מתוך {len(bboxes_person)}")
+        x1, y1, x2, y2 = bbox
+        cropped = pil_image.crop((x1, y1, x2, y2))
+        add_user_image_from_pil(cropped)
+        add_user_text("---")
+        gemini_text = send_and_receive(gemini_api_key)
+        if is_female_from_text(gemini_text):
+            women_boxes.append(bbox)
+    # 3) שלב SAM2 (עבור בוקסים של נשים)
+    if sam2_predictor is None:
+        print("[process_image] SAM2 לא זמין/נטען. מחזירים תמונה ללא טשטוש.")
+        return pil_image
+    progress_callback(0.6, f"מתחיל פילוח SAM2 על {len(women_boxes)} נשים...")
+    sam2_predictor.set_image(np.array(pil_image))
+    women_masks = []
+    n_women = len(women_boxes) if women_boxes else 1
+    for j, bbox in enumerate(women_boxes, start=1):
+        fraction = 0.6 + (0.3 * j / n_women)  # עדכון עד 90%
+        progress_callback(fraction, f"[SAM2] מפלח בוקס #{j} מתוך {len(women_boxes)}")
+        box_np = np.array([bbox])
+        masks, scores, _ = sam2_predictor.predict(
+            point_coords=None,
+            point_labels=None,
+            box=box_np,
+            multimask_output=False,
+        )
+        if masks.ndim == 4 and masks.shape[1] == 1:
+            mask = masks.squeeze(1)[0].astype(bool)
+        elif masks.ndim == 3:
+            mask = masks[0].astype(bool)
+        else:
+            raise ValueError(f"[SAM2] צורת masks לא צפויה: {masks.shape}")
+        women_masks.append((bbox, mask))
+    # 4) שלב טשטוש
+    progress_callback(0.9, "מתחיל טשטוש האזורים המזוהים (Blur + פיקסול)...")
+    final_image = pil_image.copy()
+    for (bbox, mask) in women_masks:
+        final_image = blur_regions_with_mask(final_image, mask)
+    progress_callback(1.0, "סיימנו! מחזירים את התוצאה הסופית.")
+    return final_image

example_images/example.jpg ADDED Viewed

requirements.txt ADDED Viewed

	@@ -0,0 +1,10 @@

+gradio
+torch
+numpy
+opencv-python
+Pillow
+requests
+ultralytics
+scipy
+hydra-core
+git+https://github.com/facebookresearch/sam2.git