Spaces:

Krokodilpirat
/

Video-Depth-Anything_RGBD_Zero

Running on Zero

App Files Files Community

Krokodilpirat commited on Jul 14

Commit

dd8dbe1

verified ·

1 Parent(s): 3ab8117

Update app.py

Browse files

Files changed (1) hide show

app.py +107 -5

app.py CHANGED Viewed

@@ -1,25 +1,104 @@
 import os
 import sys
 import gc
 import cv2
 import torch
 import numpy as np
 import gradio as gr
 import subprocess
 import requests
 from urllib.parse import urlparse
 from huggingface_hub import hf_hub_download
-from video_depth_anything.video_depth import VideoDepthAnything
-from utils.dc_utils import read_video_frames, save_video
 from transformers import BlipProcessor, BlipForConditionalGeneration
 from PIL import Image
 # --- Environment setup ---
 os.environ["HF_HOME"] = "/tmp/huggingface"
 os.environ["TRANSFORMERS_CACHE"] = "/tmp/huggingface/transformers"
 os.environ["MPLCONFIGDIR"] = "/tmp/matplotlib"
 # --- Patch Gradio schema bug ---
 def patch_gradio_utils():
     """Fix Gradio schema type checking bug"""
     try:
@@ -34,16 +113,23 @@ def patch_gradio_utils():
             return original_get_type(schema)
         utils.get_type = patched_get_type
-        print("Successfully patched Gradio utils.get_type")
     except Exception as e:
-        print(f"Could not patch Gradio utils: {e}")
 patch_gradio_utils()
 # --- Load BLIP model ---
 print("Loading BLIP model...")
 blip_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
 blip_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base").to("cpu")
 def get_first_frame_for_blip(video_path, target_size=480):
     """Effizient: Lädt nur das erste Frame für BLIP (nicht alle Frames!)"""
@@ -353,19 +439,36 @@ def embed_thumbnail_in_video(video_path, thumbnail_array, base_name):
         return False
 # --- Load depth model ---
 print("Loading Video Depth Anything model...")
 DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
 encoder = 'vitl'
 model_name = 'Large'
 model_configs = {
     'vitl': {'encoder': 'vitl', 'features': 256, 'out_channels': [256, 512, 1024, 1024]},
 }
 video_depth_anything = VideoDepthAnything(**model_configs[encoder])
 ckpt_path = hf_hub_download(repo_id=f"depth-anything/Video-Depth-Anything-{model_name}",
                             filename=f"video_depth_anything_{encoder}.pth",
                             cache_dir="/tmp/huggingface")
 video_depth_anything.load_state_dict(torch.load(ckpt_path, map_location='cpu'))
 video_depth_anything = video_depth_anything.to(DEVICE).eval()
 # --- URL validation and download ---
 def validate_url(url):
@@ -421,7 +524,6 @@ def download_video_with_ytdlp(url):
         raise RuntimeError("yt-dlp Python module not installed. Install with: pip install yt-dlp")
     except Exception as e:
         raise RuntimeError(f"Failed to download with yt-dlp: {e}")
 def detect_video_source(url):
     """Detect video source and determine download method"""
     # Known platforms with special handling (priority check first)

+# ==========================================
+# 🔍 DEBUGGING SYSTEM FÜR HF SPACES
+# ==========================================
+import time
+import psutil
+import datetime
+class SimpleDebugger:
+    def __init__(self):
+        self.start_time = time.time()
+        print("=" * 60)
+        print("🔍 HF SPACES DEBUGGING SYSTEM GESTARTET")
+        print(f"🕐 Start Zeit: {datetime.datetime.now().strftime('%H:%M:%S')}")
+        print("=" * 60)
+        # System Info
+        memory = psutil.virtual_memory()
+        print(f"💾 RAM Total: {memory.total / 1024**3:.1f}GB")
+        print(f"💾 RAM Free: {memory.available / 1024**3:.1f}GB")
+        print("=" * 60)
+    def log(self, message, details=None):
+        """Checkpoint mit Timing und Memory Info"""
+        elapsed = time.time() - self.start_time
+        timestamp = datetime.datetime.now().strftime('%H:%M:%S')
+        try:
+            memory = psutil.virtual_memory()
+            memory_pct = memory.percent
+            memory_free = memory.available / 1024**3
+        except:
+            memory_pct = 0
+            memory_free = 0
+        print(f"\n🕐 [{timestamp}] {message}")
+        print(f"   ⏱️  Nach {elapsed:.1f}s | 💾 RAM: {memory_pct:.1f}% ({memory_free:.1f}GB frei)")
+        if details:
+            print(f"   📋 {details}")
+        # Warnung bei langsamen Operationen
+        if elapsed > 60:
+            print(f"   ⚠️  WARNUNG: Schon {elapsed:.1f}s vergangen!")
+        elif elapsed > 300:  # 5 Minuten
+            print(f"   🚨 SEHR LANGSAM: {elapsed:.1f}s - Das ist ungewöhnlich lang!")
+# Debugger initialisieren
+debug = SimpleDebugger()
+# ==========================================
+# IHR BESTEHENDER CODE MIT DEBUG-LOGS
+# ==========================================
+debug.log("Starte Python Imports...")
 import os
 import sys
 import gc
+debug.log("Basic Python imports fertig")
 import cv2
 import torch
 import numpy as np
+debug.log("OpenCV, PyTorch, NumPy imports fertig")
 import gradio as gr
+debug.log("Gradio importiert")
 import subprocess
 import requests
 from urllib.parse import urlparse
+debug.log("Network-Module importiert")
+debug.log("Starte HuggingFace Hub Import...")
 from huggingface_hub import hf_hub_download
+debug.log("HuggingFace Hub importiert")
+debug.log("Starte Video Depth Anything Import (kann hängen wenn Module fehlen)...")
+try:
+    from video_depth_anything.video_depth import VideoDepthAnything
+    from utils.dc_utils import read_video_frames, save_video
+    debug.log("✅ Video Depth Anything Module erfolgreich importiert")
+except Exception as e:
+    debug.log("❌ Video Depth Anything Import FEHLER", str(e))
+debug.log("Starte Transformers Import (erstes kritisches Modul)...")
 from transformers import BlipProcessor, BlipForConditionalGeneration
+debug.log("✅ Transformers erfolgreich importiert")
 from PIL import Image
+debug.log("Alle Imports abgeschlossen")
 # --- Environment setup ---
+debug.log("Environment Variablen werden gesetzt...")
 os.environ["HF_HOME"] = "/tmp/huggingface"
 os.environ["TRANSFORMERS_CACHE"] = "/tmp/huggingface/transformers"
 os.environ["MPLCONFIGDIR"] = "/tmp/matplotlib"
+debug.log("Environment setup fertig")
 # --- Patch Gradio schema bug ---
+debug.log("Gradio Utils werden gepatcht...")
 def patch_gradio_utils():
     """Fix Gradio schema type checking bug"""
     try:
             return original_get_type(schema)
         utils.get_type = patched_get_type
+        debug.log("✅ Gradio utils erfolgreich gepatcht")
     except Exception as e:
+        debug.log("❌ Gradio utils patching FEHLER", str(e))
 patch_gradio_utils()
 # --- Load BLIP model ---
+debug.log("🔥 KRITISCH: BLIP Model Loading startet - das ist oft der langsamste Teil!")
+debug.log("BLIP Processor Download/Load startet...")
 print("Loading BLIP model...")
 blip_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
+debug.log("✅ BLIP Processor geladen")
+debug.log("BLIP Model Download/Load startet - das dauert oft sehr lange...")
 blip_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base").to("cpu")
+debug.log("✅ BLIP Model geladen und auf CPU verschoben")
 def get_first_frame_for_blip(video_path, target_size=480):
     """Effizient: Lädt nur das erste Frame für BLIP (nicht alle Frames!)"""
         return False
 # --- Load depth model ---
+debug.log("🔥 KRITISCH: Video Depth Anything Model Loading startet!")
+debug.log("Device wird ermittelt...")
 print("Loading Video Depth Anything model...")
 DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
+debug.log(f"Device ausgewählt: {DEVICE}")
 encoder = 'vitl'
 model_name = 'Large'
 model_configs = {
     'vitl': {'encoder': 'vitl', 'features': 256, 'out_channels': [256, 512, 1024, 1024]},
 }
+debug.log("VideoDepthAnything Instanz wird erstellt...")
 video_depth_anything = VideoDepthAnything(**model_configs[encoder])
+debug.log("✅ VideoDepthAnything Instanz erstellt")
+debug.log("🔥 KRITISCH: Model Checkpoint Download startet - das kann sehr lange dauern!")
 ckpt_path = hf_hub_download(repo_id=f"depth-anything/Video-Depth-Anything-{model_name}",
                             filename=f"video_depth_anything_{encoder}.pth",
                             cache_dir="/tmp/huggingface")
+debug.log("✅ Model Checkpoint heruntergeladen", f"Pfad: {ckpt_path}")
+debug.log("Model Weights werden geladen...")
 video_depth_anything.load_state_dict(torch.load(ckpt_path, map_location='cpu'))
+debug.log("✅ Model Weights geladen")
+debug.log("Model wird auf Device verschoben und in Eval-Modus gesetzt...")
 video_depth_anything = video_depth_anything.to(DEVICE).eval()
+debug.log("✅ Video Depth Anything Model komplett bereit!")
 # --- URL validation and download ---
 def validate_url(url):
         raise RuntimeError("yt-dlp Python module not installed. Install with: pip install yt-dlp")
     except Exception as e:
         raise RuntimeError(f"Failed to download with yt-dlp: {e}")
 def detect_video_source(url):
     """Detect video source and determine download method"""
     # Known platforms with special handling (priority check first)