Krokodilpirat commited on
Commit
41131e3
·
verified ·
1 Parent(s): b77d16c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +62 -2
app.py CHANGED
@@ -45,8 +45,56 @@ print("Loading BLIP model...")
45
  blip_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
46
  blip_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base").to("cpu")
47
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
  def generate_blip_name(frame: np.ndarray) -> str:
49
- """Generate filename from frame using BLIP image captioning"""
50
  try:
51
  # Check if frame is valid
52
  if frame is None or frame.size == 0:
@@ -57,11 +105,23 @@ def generate_blip_name(frame: np.ndarray) -> str:
57
  out = blip_model.generate(**inputs)
58
  caption = blip_processor.decode(out[0], skip_special_tokens=True).lower()
59
 
 
 
60
  # Remove common stopwords and create filename
61
  stopwords = {"a", "an", "the", "in", "on", "at", "with", "by", "of", "for", "under", "through", "and", "is"}
62
  words = [w for w in caption.split() if w not in stopwords and w.isalpha()]
 
 
 
 
 
 
63
  trimmed = "_".join(words[:3])
64
- return trimmed[:30] if trimmed else "video"
 
 
 
 
65
  except Exception as e:
66
  print(f"BLIP error: {e}")
67
  return "video"
 
45
  blip_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
46
  blip_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base").to("cpu")
47
 
48
+ def get_middle_frame_for_blip(video_path, target_size=480):
49
+ """Effizient: Lädt nur das mittlere Frame für BLIP (nicht alle Frames!)"""
50
+ try:
51
+ cap = cv2.VideoCapture(video_path)
52
+
53
+ # Prüfe ob Video gültig ist
54
+ if not cap.isOpened():
55
+ print(f"DEBUG: Could not open video: {video_path}")
56
+ cap.release()
57
+ return None
58
+
59
+ # Hole Frame-Count und springe zum mittleren Frame
60
+ frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
61
+ if frame_count <= 0:
62
+ print(f"DEBUG: Invalid frame count: {frame_count}")
63
+ cap.release()
64
+ return None
65
+
66
+ middle_idx = frame_count // 2
67
+ print(f"DEBUG: Video has {frame_count} frames, jumping to frame {middle_idx}")
68
+
69
+ # Springe direkt zum Ziel-Frame (keine Iteration!)
70
+ cap.set(cv2.CAP_PROP_POS_FRAMES, middle_idx)
71
+ ret, frame = cap.read()
72
+ cap.release()
73
+
74
+ if not ret or frame is None:
75
+ print("DEBUG: Could not read middle frame")
76
+ return None
77
+
78
+ # Verkleinere nur dieses eine Frame
79
+ h, w = frame.shape[:2]
80
+ if max(h, w) > target_size:
81
+ scale = target_size / max(h, w)
82
+ new_h, new_w = int(h * scale), int(w * scale)
83
+ frame = cv2.resize(frame, (new_w, new_h))
84
+ print(f"DEBUG: Resized frame from {w}x{h} to {new_w}x{new_h}")
85
+ else:
86
+ print(f"DEBUG: Frame size {w}x{h} already within target {target_size}")
87
+
88
+ # Convert BGR to RGB für BLIP
89
+ frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
90
+ return frame_rgb
91
+
92
+ except Exception as e:
93
+ print(f"DEBUG: get_middle_frame_for_blip error: {e}")
94
+ return None
95
+
96
  def generate_blip_name(frame: np.ndarray) -> str:
97
+ """Generate filename from frame using BLIP image captioning + Duplikat-Entfernung"""
98
  try:
99
  # Check if frame is valid
100
  if frame is None or frame.size == 0:
 
105
  out = blip_model.generate(**inputs)
106
  caption = blip_processor.decode(out[0], skip_special_tokens=True).lower()
107
 
108
+ print(f"DEBUG: BLIP caption: '{caption}'")
109
+
110
  # Remove common stopwords and create filename
111
  stopwords = {"a", "an", "the", "in", "on", "at", "with", "by", "of", "for", "under", "through", "and", "is"}
112
  words = [w for w in caption.split() if w not in stopwords and w.isalpha()]
113
+
114
+ # 🎯 NEUE OPTIMIERUNG: Entferne Duplikate, behalte Reihenfolge
115
+ words = list(dict.fromkeys(words))
116
+
117
+ print(f"DEBUG: Words after stopword removal and deduplication: {words}")
118
+
119
  trimmed = "_".join(words[:3])
120
+ result = trimmed[:30] if trimmed else "video"
121
+
122
+ print(f"DEBUG: Final BLIP name: '{result}'")
123
+ return result
124
+
125
  except Exception as e:
126
  print(f"BLIP error: {e}")
127
  return "video"