Kevin King commited on
Commit
377a152
Β·
1 Parent(s): b2395f1

Refactor video processing logic to improve error handling and resource cleanup in Streamlit app

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +34 -41
src/streamlit_app.py CHANGED
@@ -8,25 +8,21 @@ from deepface import DeepFace
8
  import logging
9
  import soundfile as sf
10
  import tempfile
11
- from PIL import Image
12
  import cv2
13
  from moviepy.editor import VideoFileClip
 
14
 
15
- # --- THIS IS THE FIX for Error 1 ---
16
  # Create a cross-platform, writable cache directory for all libraries
 
17
  CACHE_DIR = os.path.join(tempfile.gettempdir(), "affectlink_cache")
18
  os.makedirs(CACHE_DIR, exist_ok=True)
19
  os.environ['DEEPFACE_HOME'] = CACHE_DIR
20
  os.environ['HF_HOME'] = CACHE_DIR
21
- # ====================================
22
 
23
  # --- Page Configuration ---
24
- st.set_page_config(
25
- page_title="AffectLink Demo",
26
- page_icon="😊",
27
- layout="wide"
28
- )
29
-
30
  st.title("AffectLink: Post-Hoc Emotion Analysis")
31
  st.write("Upload a short video clip (under 30 seconds) to analyze facial expressions, speech-to-text, and the emotional tone of the audio.")
32
 
@@ -38,13 +34,8 @@ logging.getLogger('moviepy').setLevel(logging.ERROR)
38
 
39
  # --- Emotion Mappings ---
40
  UNIFIED_EMOTIONS = ['neutral', 'happy', 'sad', 'angry']
41
- TEXT_TO_UNIFIED = {
42
- 'neutral': 'neutral', 'joy': 'happy', 'sadness': 'sad', 'anger': 'angry',
43
- 'fear': None, 'surprise': None, 'disgust': None
44
- }
45
- SER_TO_UNIFIED = {
46
- 'neu': 'neutral', 'hap': 'happy', 'sad': 'sad', 'ang': 'angry'
47
- }
48
  AUDIO_SAMPLE_RATE = 16000
49
 
50
  # --- Model Loading ---
@@ -75,20 +66,17 @@ if uploaded_file is not None:
75
  if st.button("Analyze Video"):
76
  facial_analysis_results = []
77
  audio_analysis_results = {}
78
- cap = None # Initialize cap to None
79
-
80
  # --- Video Processing ---
81
  with st.spinner("Analyzing video for facial expressions... (1 frame per second)"):
 
82
  try:
83
  cap = cv2.VideoCapture(temp_video_path)
84
- fps = cap.get(cv2.CAP_PROP_FPS)
85
- if fps == 0:
86
- fps = 30
87
  frame_count = 0
88
  while cap.isOpened():
89
  ret, frame = cap.read()
90
- if not ret:
91
- break
92
  if frame_count % int(fps) == 0:
93
  timestamp = frame_count / fps
94
  analysis = DeepFace.analyze(frame, actions=['emotion'], enforce_detection=False, silent=True)
@@ -98,12 +86,12 @@ if uploaded_file is not None:
98
  except Exception as e:
99
  st.error(f"An error occurred during facial analysis: {e}")
100
  finally:
101
- if cap:
102
- cap.release() # --- THIS IS PART of the FIX for Error 3 ---
103
 
104
  # --- Audio Processing ---
105
  with st.spinner("Extracting and analyzing audio..."):
106
  video_clip = None
 
107
  try:
108
  video_clip = VideoFileClip(temp_video_path)
109
  if video_clip.audio:
@@ -112,7 +100,7 @@ if uploaded_file is not None:
112
  temp_audio_path = taudio.name
113
 
114
  result = whisper_model.transcribe(temp_audio_path, fp16=False)
115
- transcribed_text = result['text'] if result['text'] else "No speech detected."
116
  audio_analysis_results['Transcription'] = transcribed_text
117
 
118
  if "No speech detected" not in transcribed_text:
@@ -120,18 +108,17 @@ if uploaded_file is not None:
120
  unified_text_scores = {e: 0.0 for e in UNIFIED_EMOTIONS}
121
  for emo in text_emotions:
122
  unified_emo = TEXT_TO_UNIFIED.get(emo['label'])
123
- if unified_emo:
124
- unified_text_scores[unified_emo] += emo['score']
125
  audio_analysis_results['Text Emotion'] = max(unified_text_scores, key=unified_text_scores.get).capitalize()
126
 
127
  audio_array, _ = sf.read(temp_audio_path)
128
 
129
- # --- THIS IS THE FIX for Error 2 ---
130
- min_length = 400 # Minimum samples for the SER model
131
  if len(audio_array) < min_length:
132
- padding = np.zeros(min_length - len(audio_array))
133
  audio_array = np.concatenate([audio_array, padding])
134
- # =====================================
135
 
136
  inputs = ser_feature_extractor(audio_array, sampling_rate=AUDIO_SAMPLE_RATE, return_tensors="pt", padding=True)
137
  with torch.no_grad():
@@ -141,18 +128,16 @@ if uploaded_file is not None:
141
  for i, score in enumerate(scores):
142
  raw_emo = ser_model.config.id2label[i]
143
  unified_emo = SER_TO_UNIFIED.get(raw_emo)
144
- if unified_emo:
145
- unified_ser_scores[unified_emo] += score.item()
146
  audio_analysis_results['Speech Emotion'] = max(unified_ser_scores, key=unified_ser_scores.get).capitalize()
147
- os.unlink(temp_audio_path)
148
  else:
149
  audio_analysis_results['Transcription'] = "No audio track found in the video."
150
 
151
  except Exception as e:
152
  st.error(f"An error occurred during audio analysis: {e}")
153
  finally:
154
- if video_clip:
155
- video_clip.close() # --- THIS IS PART of the FIX for Error 3 ---
156
 
157
  # --- Display Results ---
158
  st.header("Analysis Results")
@@ -169,9 +154,17 @@ if uploaded_file is not None:
169
  st.write(f"**Time {int(timestamp // 60):02d}:{int(timestamp % 60):02d}:** {emotion}")
170
  else:
171
  st.write("No faces detected or video processing failed.")
172
-
173
  finally:
174
- # --- THIS IS THE FINAL PART of the FIX for Error 3 ---
175
- # Clean up the temporary video file in a finally block to ensure it runs
 
176
  if temp_video_path and os.path.exists(temp_video_path):
177
- os.unlink(temp_video_path)
 
 
 
 
 
 
 
 
8
  import logging
9
  import soundfile as sf
10
  import tempfile
 
11
  import cv2
12
  from moviepy.editor import VideoFileClip
13
+ import time
14
 
15
+ # --- THIS IS THE FIX for Error 1 (Local Pathing) ---
16
  # Create a cross-platform, writable cache directory for all libraries
17
+ # On Windows, this will be in AppData\Local\Temp. On Linux (HF Spaces), it will be in /tmp
18
  CACHE_DIR = os.path.join(tempfile.gettempdir(), "affectlink_cache")
19
  os.makedirs(CACHE_DIR, exist_ok=True)
20
  os.environ['DEEPFACE_HOME'] = CACHE_DIR
21
  os.environ['HF_HOME'] = CACHE_DIR
22
+ # ===================================================
23
 
24
  # --- Page Configuration ---
25
+ st.set_page_config(page_title="AffectLink Demo", page_icon="😊", layout="wide")
 
 
 
 
 
26
  st.title("AffectLink: Post-Hoc Emotion Analysis")
27
  st.write("Upload a short video clip (under 30 seconds) to analyze facial expressions, speech-to-text, and the emotional tone of the audio.")
28
 
 
34
 
35
  # --- Emotion Mappings ---
36
  UNIFIED_EMOTIONS = ['neutral', 'happy', 'sad', 'angry']
37
+ TEXT_TO_UNIFIED = {'neutral': 'neutral', 'joy': 'happy', 'sadness': 'sad', 'anger': 'angry'}
38
+ SER_TO_UNIFIED = {'neu': 'neutral', 'hap': 'happy', 'sad': 'sad', 'ang': 'angry'}
 
 
 
 
 
39
  AUDIO_SAMPLE_RATE = 16000
40
 
41
  # --- Model Loading ---
 
66
  if st.button("Analyze Video"):
67
  facial_analysis_results = []
68
  audio_analysis_results = {}
69
+
 
70
  # --- Video Processing ---
71
  with st.spinner("Analyzing video for facial expressions... (1 frame per second)"):
72
+ cap = None
73
  try:
74
  cap = cv2.VideoCapture(temp_video_path)
75
+ fps = cap.get(cv2.CAP_PROP_FPS) or 30
 
 
76
  frame_count = 0
77
  while cap.isOpened():
78
  ret, frame = cap.read()
79
+ if not ret: break
 
80
  if frame_count % int(fps) == 0:
81
  timestamp = frame_count / fps
82
  analysis = DeepFace.analyze(frame, actions=['emotion'], enforce_detection=False, silent=True)
 
86
  except Exception as e:
87
  st.error(f"An error occurred during facial analysis: {e}")
88
  finally:
89
+ if cap: cap.release()
 
90
 
91
  # --- Audio Processing ---
92
  with st.spinner("Extracting and analyzing audio..."):
93
  video_clip = None
94
+ temp_audio_path = None
95
  try:
96
  video_clip = VideoFileClip(temp_video_path)
97
  if video_clip.audio:
 
100
  temp_audio_path = taudio.name
101
 
102
  result = whisper_model.transcribe(temp_audio_path, fp16=False)
103
+ transcribed_text = result['text'] if result['text'].strip() else "No speech detected."
104
  audio_analysis_results['Transcription'] = transcribed_text
105
 
106
  if "No speech detected" not in transcribed_text:
 
108
  unified_text_scores = {e: 0.0 for e in UNIFIED_EMOTIONS}
109
  for emo in text_emotions:
110
  unified_emo = TEXT_TO_UNIFIED.get(emo['label'])
111
+ if unified_emo: unified_text_scores[unified_emo] += emo['score']
 
112
  audio_analysis_results['Text Emotion'] = max(unified_text_scores, key=unified_text_scores.get).capitalize()
113
 
114
  audio_array, _ = sf.read(temp_audio_path)
115
 
116
+ # --- FIX for Error 2 ---
117
+ min_length = 400
118
  if len(audio_array) < min_length:
119
+ padding = np.zeros(min_length - len(audio_array), dtype=np.float32)
120
  audio_array = np.concatenate([audio_array, padding])
121
+ # ======================
122
 
123
  inputs = ser_feature_extractor(audio_array, sampling_rate=AUDIO_SAMPLE_RATE, return_tensors="pt", padding=True)
124
  with torch.no_grad():
 
128
  for i, score in enumerate(scores):
129
  raw_emo = ser_model.config.id2label[i]
130
  unified_emo = SER_TO_UNIFIED.get(raw_emo)
131
+ if unified_emo: unified_ser_scores[unified_emo] += score.item()
 
132
  audio_analysis_results['Speech Emotion'] = max(unified_ser_scores, key=unified_ser_scores.get).capitalize()
 
133
  else:
134
  audio_analysis_results['Transcription'] = "No audio track found in the video."
135
 
136
  except Exception as e:
137
  st.error(f"An error occurred during audio analysis: {e}")
138
  finally:
139
+ if video_clip: video_clip.close()
140
+ if temp_audio_path and os.path.exists(temp_audio_path): os.unlink(temp_audio_path)
141
 
142
  # --- Display Results ---
143
  st.header("Analysis Results")
 
154
  st.write(f"**Time {int(timestamp // 60):02d}:{int(timestamp % 60):02d}:** {emotion}")
155
  else:
156
  st.write("No faces detected or video processing failed.")
157
+
158
  finally:
159
+ # --- FIX for Error 3 ---
160
+ # Ensure the video file is released before attempting to delete it
161
+ # This block runs after the 'Analyze Video' button logic completes
162
  if temp_video_path and os.path.exists(temp_video_path):
163
+ # A small delay can sometimes help ensure file locks are released
164
+ time.sleep(1)
165
+ try:
166
+ os.unlink(temp_video_path)
167
+ except PermissionError:
168
+ st.warning(f"Could not delete temporary video file. It may still be in use: {temp_video_path}")
169
+ except Exception as e:
170
+ st.warning(f"An error occurred while deleting the temporary video file: {e}")