Kevin King commited on
Commit
d5ac657
·
1 Parent(s): e83cd54

REFAC: Improve emotion mapping and display logic in Streamlit app

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +24 -15
src/streamlit_app.py CHANGED
@@ -26,7 +26,6 @@ st.title("AffectLink: Post-Hoc Emotion Analysis")
26
  st.write("Upload a short video clip (under 30 seconds) to see a multimodal emotion analysis.")
27
 
28
  # --- Logger Configuration ---
29
- # [Logger setup remains the same]
30
  logging.basicConfig(level=logging.INFO)
31
  logging.getLogger('deepface').setLevel(logging.ERROR)
32
  logging.getLogger('huggingface_hub').setLevel(logging.WARNING)
@@ -34,10 +33,11 @@ logging.getLogger('moviepy').setLevel(logging.ERROR)
34
 
35
 
36
  # --- Emotion Mappings ---
37
- UNIFIED_EMOTIONS = ['angry', 'happy', 'sad', 'neutral'] # Defined order for vectors
 
38
  TEXT_TO_UNIFIED = {'neutral': 'neutral', 'joy': 'happy', 'sadness': 'sad', 'anger': 'angry'}
39
  SER_TO_UNIFIED = {'neu': 'neutral', 'hap': 'happy', 'sad': 'sad', 'ang': 'angry'}
40
- FACIAL_TO_UNIFIED = {'neutral': 'neutral', 'happy': 'happy', 'sad': 'sad', 'angry': 'angry'}
41
  AUDIO_SAMPLE_RATE = 16000
42
 
43
  # --- Model Loading ---
@@ -58,12 +58,12 @@ def create_unified_vector(scores_dict, mapping_dict):
58
  """Creates a normalized vector from a dictionary of scores based on a mapping."""
59
  vector = np.zeros(len(UNIFIED_EMOTIONS))
60
  for label, score in scores_dict.items():
 
61
  unified_label = mapping_dict.get(label)
62
- if unified_label and unified_label in UNIFIED_EMOTIONS:
63
  idx = UNIFIED_EMOTIONS.index(unified_label)
64
  vector[idx] += score
65
 
66
- # Normalize the vector
67
  norm = np.linalg.norm(vector)
68
  if norm > 0:
69
  vector /= norm
@@ -107,7 +107,6 @@ if uploaded_file is not None:
107
  timestamp = frame_count / fps
108
  analysis = DeepFace.analyze(frame, actions=['emotion'], enforce_detection=False, silent=True)
109
  if isinstance(analysis, list) and len(analysis) > 0:
110
- # Store the full emotion dictionary for the plot
111
  fer_timeline[timestamp] = analysis[0]['emotion']
112
  frame_count += 1
113
  finally:
@@ -123,7 +122,6 @@ if uploaded_file is not None:
123
  video_clip.audio.write_audiofile(taudio.name, fps=AUDIO_SAMPLE_RATE, logger=None)
124
  temp_audio_path = taudio.name
125
 
126
- # Run all audio models
127
  result = whisper_model.transcribe(temp_audio_path, fp16=False)
128
  transcribed_text = result['text'].strip()
129
  audio_analysis_results['Transcription'] = transcribed_text if transcribed_text else "No speech detected."
@@ -156,7 +154,7 @@ if uploaded_file is not None:
156
  ser_scores = audio_analysis_results.get('Speech Emotion Scores', {})
157
  text_scores = audio_analysis_results.get('Text Emotion Scores', {})
158
 
159
- # Create vectors for cosine similarity
160
  fer_vector = create_unified_vector(fer_avg_scores, FACIAL_TO_UNIFIED)
161
  ser_vector = create_unified_vector(ser_scores, SER_TO_UNIFIED)
162
  text_vector = create_unified_vector(text_scores, TEXT_TO_UNIFIED)
@@ -167,24 +165,35 @@ if uploaded_file is not None:
167
  sim_speech_text = cosine_similarity([ser_vector], [text_vector])[0][0]
168
  avg_similarity = np.mean([sim_face_text, sim_face_speech, sim_speech_text])
169
 
 
 
 
 
 
 
 
 
 
 
 
170
  # Display metrics
171
  col1, col2 = st.columns([1, 2])
172
  with col1:
173
  st.subheader("Multimodal Summary")
174
  st.write(f"**Transcription:** \"{audio_analysis_results.get('Transcription', 'N/A')}\"")
175
- st.metric("Dominant Facial Emotion", max(fer_avg_scores, key=fer_avg_scores.get).capitalize() if fer_avg_scores else "N/A")
176
- st.metric("Dominant Text Emotion", max(text_scores, key=lambda k: TEXT_TO_UNIFIED.get(k) is not None and text_scores.get(k) or -1).capitalize() if text_scores else "N/A")
177
- st.metric("Dominant Speech Emotion", max(ser_scores, key=lambda k: SER_TO_UNIFIED.get(k) is not None and ser_scores.get(k) or -1).capitalize() if ser_scores else "N/A")
178
  st.metric("Emotion Consistency", get_consistency_level(avg_similarity), f"{avg_similarity:.2f} Avg. Cosine Similarity")
179
 
180
  with col2:
181
  st.subheader("Facial Emotion Over Time")
182
  if fer_timeline:
183
- # Convert timeline to a DataFrame suitable for st.line_chart
184
  df = pd.DataFrame(fer_timeline).T
185
- # Filter for only the unified emotions we care about
186
- df_filtered = df[list(FACIAL_TO_UNIFIED.keys())].rename(columns=FACIAL_TO_UNIFIED)
187
- st.line_chart(df_filtered)
 
188
  else:
189
  st.write("No faces detected to plot.")
190
 
 
26
  st.write("Upload a short video clip (under 30 seconds) to see a multimodal emotion analysis.")
27
 
28
  # --- Logger Configuration ---
 
29
  logging.basicConfig(level=logging.INFO)
30
  logging.getLogger('deepface').setLevel(logging.ERROR)
31
  logging.getLogger('huggingface_hub').setLevel(logging.WARNING)
 
33
 
34
 
35
  # --- Emotion Mappings ---
36
+ # This is the single source of truth for our final emotion space
37
+ UNIFIED_EMOTIONS = ['angry', 'happy', 'sad', 'neutral']
38
  TEXT_TO_UNIFIED = {'neutral': 'neutral', 'joy': 'happy', 'sadness': 'sad', 'anger': 'angry'}
39
  SER_TO_UNIFIED = {'neu': 'neutral', 'hap': 'happy', 'sad': 'sad', 'ang': 'angry'}
40
+ FACIAL_TO_UNIFIED = {'neutral': 'neutral', 'happy': 'happy', 'sad': 'sad', 'angry': 'angry', 'fear':None, 'surprise':None, 'disgust':None}
41
  AUDIO_SAMPLE_RATE = 16000
42
 
43
  # --- Model Loading ---
 
58
  """Creates a normalized vector from a dictionary of scores based on a mapping."""
59
  vector = np.zeros(len(UNIFIED_EMOTIONS))
60
  for label, score in scores_dict.items():
61
+ # Map the raw label (e.g., 'neu', 'joy') to our unified label ('neutral', 'happy')
62
  unified_label = mapping_dict.get(label)
63
+ if unified_label in UNIFIED_EMOTIONS:
64
  idx = UNIFIED_EMOTIONS.index(unified_label)
65
  vector[idx] += score
66
 
 
67
  norm = np.linalg.norm(vector)
68
  if norm > 0:
69
  vector /= norm
 
107
  timestamp = frame_count / fps
108
  analysis = DeepFace.analyze(frame, actions=['emotion'], enforce_detection=False, silent=True)
109
  if isinstance(analysis, list) and len(analysis) > 0:
 
110
  fer_timeline[timestamp] = analysis[0]['emotion']
111
  frame_count += 1
112
  finally:
 
122
  video_clip.audio.write_audiofile(taudio.name, fps=AUDIO_SAMPLE_RATE, logger=None)
123
  temp_audio_path = taudio.name
124
 
 
125
  result = whisper_model.transcribe(temp_audio_path, fp16=False)
126
  transcribed_text = result['text'].strip()
127
  audio_analysis_results['Transcription'] = transcribed_text if transcribed_text else "No speech detected."
 
154
  ser_scores = audio_analysis_results.get('Speech Emotion Scores', {})
155
  text_scores = audio_analysis_results.get('Text Emotion Scores', {})
156
 
157
+ # Create vectors using the unified mappings. This ensures cosine similarity is correct.
158
  fer_vector = create_unified_vector(fer_avg_scores, FACIAL_TO_UNIFIED)
159
  ser_vector = create_unified_vector(ser_scores, SER_TO_UNIFIED)
160
  text_vector = create_unified_vector(text_scores, TEXT_TO_UNIFIED)
 
165
  sim_speech_text = cosine_similarity([ser_vector], [text_vector])[0][0]
166
  avg_similarity = np.mean([sim_face_text, sim_face_speech, sim_speech_text])
167
 
168
+ # --- THIS IS THE FIX: Map dominant emotions to unified labels before displaying ---
169
+ dominant_fer = max(fer_avg_scores, key=fer_avg_scores.get) if fer_avg_scores else "N/A"
170
+ dominant_text_raw = max(text_scores, key=text_scores.get) if text_scores else "N/A"
171
+ dominant_ser_raw = max(ser_scores, key=ser_scores.get) if ser_scores else "N/A"
172
+
173
+ # Convert raw dominant emotions to their unified, full-word versions for display
174
+ display_fer = FACIAL_TO_UNIFIED.get(dominant_fer, "N/A").capitalize()
175
+ display_text = TEXT_TO_UNIFIED.get(dominant_text_raw, "N/A").capitalize()
176
+ display_ser = SER_TO_UNIFIED.get(dominant_ser_raw, "N/A").capitalize()
177
+ # ===================================================================================
178
+
179
  # Display metrics
180
  col1, col2 = st.columns([1, 2])
181
  with col1:
182
  st.subheader("Multimodal Summary")
183
  st.write(f"**Transcription:** \"{audio_analysis_results.get('Transcription', 'N/A')}\"")
184
+ st.metric("Dominant Facial Emotion", display_fer)
185
+ st.metric("Dominant Text Emotion", display_text)
186
+ st.metric("Dominant Speech Emotion", display_ser)
187
  st.metric("Emotion Consistency", get_consistency_level(avg_similarity), f"{avg_similarity:.2f} Avg. Cosine Similarity")
188
 
189
  with col2:
190
  st.subheader("Facial Emotion Over Time")
191
  if fer_timeline:
 
192
  df = pd.DataFrame(fer_timeline).T
193
+ # Filter for only the unified emotions we care about for the plot
194
+ plot_columns = [k for k, v in FACIAL_TO_UNIFIED.items() if v is not None]
195
+ df_filtered = df[plot_columns].rename(columns=FACIAL_TO_UNIFIED)
196
+ st.line_chart(df_filtered[UNIFIED_EMOTIONS]) # Ensure consistent column order
197
  else:
198
  st.write("No faces detected to plot.")
199