Archime commited on
Commit
6f523af
Β·
1 Parent(s): 1dd92bc

impl walkthrough

Browse files
.gitignore CHANGED
@@ -2,4 +2,4 @@
2
  .vscode/settings.json
3
  app/__pycache__/
4
  __pycache__/
5
- logs/app.log
 
2
  .vscode/settings.json
3
  app/__pycache__/
4
  __pycache__/
5
+ logs/
app.py CHANGED
@@ -1,222 +1,309 @@
1
- from app.logger_config import logger as logging
 
 
 
2
  import numpy as np
3
  import gradio as gr
4
  import asyncio
5
  from fastrtc.webrtc import WebRTC
 
6
  from pydub import AudioSegment
7
  import time
8
- import os
9
  from gradio.utils import get_space
10
 
11
- from app.logger_config import logger as logging
12
  from app.utils import (
13
- generate_coturn_config
 
14
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
- EXAMPLE_FILES = ["data/bonjour.wav", "data/bonjour2.wav"]
17
- DEFAULT_FILE = EXAMPLE_FILES[0]
18
- # UtilisΓ© pour signaler l'arrΓͺt du streaming Γ  l'intΓ©rieur du gΓ©nΓ©rateur
19
- stop_stream_state = gr.State(value=False)
20
-
21
- def read_and_stream_audio(filepath_to_stream: str):
22
- """
23
- Un gΓ©nΓ©rateur synchrone qui lit un fichier audio (via filepath_to_stream)
24
- et le streame chunk par chunk d'1 seconde.
25
- """
26
-
27
- if not filepath_to_stream or not os.path.exists(filepath_to_stream):
28
- logging.error(f"Fichier audio non trouvΓ© ou non spΓ©cifiΓ© : {filepath_to_stream}")
29
- # Tenter d'utiliser le fichier par défaut en cas de problème
30
- if os.path.exists(DEFAULT_FILE):
31
- logging.warning(f"Utilisation du fichier par dΓ©faut : {DEFAULT_FILE}")
32
- filepath_to_stream = DEFAULT_FILE
33
- else:
34
- logging.error("Fichier par dΓ©faut non trouvΓ©. ArrΓͺt du stream.")
35
- return
36
-
37
- logging.info(f"PrΓ©paration du segment audio depuis : {filepath_to_stream}")
38
- # RΓ©initialiser le signal d'arrΓͺt Γ  chaque lancement
39
- stop_stream_state.value = False
40
-
41
- try:
42
- segment = AudioSegment.from_file(filepath_to_stream)
43
- chunk_duree_ms = 1000
44
- logging.info(f"DΓ©but du streaming en chunks de {chunk_duree_ms}ms...")
45
-
46
- for i, chunk in enumerate(segment[::chunk_duree_ms]):
47
- iter_start_time = time.perf_counter()
48
- logging.info(f"Envoi du chunk {i+1}...")
49
-
50
- if stop_stream_state.value:
51
- logging.info("Signal d'arrΓͺt reΓ§u, arrΓͺt de la boucle.")
52
- break
53
-
54
- output_chunk = (
55
- chunk.frame_rate,
56
- np.array(chunk.get_array_of_samples()).reshape(1, -1),
57
  )
58
 
59
- yield output_chunk
60
-
61
- iter_end_time = time.perf_counter()
62
- processing_duration_ms = (iter_end_time - iter_start_time) * 1000
63
-
64
- sleep_duration = (chunk_duree_ms / 1000.0) - (processing_duration_ms / 1000.0) - 0.1
65
- if sleep_duration < 0:
66
- sleep_duration = 0.01 # Γ‰viter un temps de sommeil nΓ©gatif
67
-
68
- logging.debug(f"Temps de traitement: {processing_duration_ms:.2f}ms, Sommeil: {sleep_duration:.2f}s")
69
-
70
- elapsed = 0.0
71
- interval = 0.05
72
- while elapsed < sleep_duration:
73
- if stop_stream_state.value:
74
- logging.info("Signal d'arrΓͺt reΓ§u pendant l'attente.")
75
- break
76
- wait_chunk = min(interval, sleep_duration - elapsed)
77
- time.sleep(wait_chunk)
78
- elapsed += wait_chunk
79
- if stop_stream_state.value:
80
- break
81
-
82
- logging.info("Streaming terminΓ©.")
83
-
84
- except asyncio.CancelledError:
85
- logging.info("Stream arrΓͺtΓ© par l'utilisateur (CancelledError).")
86
- raise
87
- except FileNotFoundError:
88
- logging.error(f"Erreur critique : Fichier non trouvΓ© : {filepath_to_stream}")
89
- except Exception as e:
90
- logging.error(f"Erreur pendant le stream: {e}", exc_info=True)
91
- raise
92
- finally:
93
- stop_stream_state.value = False
94
- logging.info("Signal d'arrΓͺt nettoyΓ©.")
95
-
96
-
97
- def stop_streaming():
98
- """Active le signal d'arrΓͺt pour le gΓ©nΓ©rateur."""
99
- logging.info("Bouton Stop cliquΓ©: envoi du signal d'arrΓͺt.")
100
- stop_stream_state.value = True
101
- return None
102
-
103
- # --- Interface Gradio ---
104
-
105
- with gr.Blocks(theme=gr.themes.Soft()) as demo:
106
- gr.Markdown(
107
- "## Application 'Streamer' WebRTC (Serveur -> Client)\n"
108
- "Utilisez l'exemple fourni, uploadez un fichier ou enregistrez depuis votre micro, "
109
- "puis cliquez sur 'Start' pour Γ©couter le stream."
110
- )
111
-
112
- # 1. Γ‰tat pour stocker le chemin du fichier Γ  lire
113
- active_filepath = gr.State(value=DEFAULT_FILE)
114
-
115
- with gr.Row():
116
- with gr.Column():
117
- main_audio = gr.Audio(
118
- label="Source Audio",
119
- sources=["upload", "microphone"], # Combine les deux sources
120
- type="filepath",
121
- value=DEFAULT_FILE, # DΓ©faut au premier exemple
122
  )
123
- with gr.Column():
124
- webrtc_stream = WebRTC(
125
- label="Stream Audio",
126
- mode="receive",
127
- modality="audio",
128
- rtc_configuration=generate_coturn_config(),
129
- visible=True, # CachΓ© par dΓ©faut
130
- height = 200,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
131
  )
132
- # 4. Boutons de contrΓ΄le
133
- with gr.Row():
134
- with gr.Column():
135
- start_button = gr.Button("Start Streaming", variant="primary")
136
- stop_button = gr.Button("Stop Streaming", variant="stop", interactive=False)
137
- with gr.Column():
138
- gr.Text()
139
-
140
- def set_new_file(filepath):
141
- """Met Γ  jour l'Γ©tat avec le nouveau chemin, ou revient au dΓ©faut si None."""
142
- if filepath is None:
143
- logging.info("Audio effacΓ©, retour au fichier d'exemple par dΓ©faut.")
144
- new_path = DEFAULT_FILE
145
- else:
146
- logging.info(f"Nouvelle source audio sΓ©lectionnΓ©e : {filepath}")
147
- new_path = filepath
148
- # Retourne la valeur Γ  mettre dans le gr.State
149
- return new_path
150
-
151
- # Mettre Γ  jour le chemin si l'utilisateur upload, efface, ou change le fichier
152
- main_audio.change(
153
- fn=set_new_file,
154
- inputs=[main_audio],
155
- outputs=[active_filepath]
156
- )
157
-
158
- # Mettre Γ  jour le chemin si l'utilisateur termine un enregistrement
159
- main_audio.stop_recording(
160
- fn=set_new_file,
161
- inputs=[main_audio],
162
- outputs=[active_filepath]
163
- )
164
-
165
-
166
- # Fonctions pour mettre Γ  jour l'Γ©tat de l'interface
167
- def start_streaming_ui():
168
- logging.info("UI : DΓ©marrage du streaming. DΓ©sactivation des contrΓ΄les.")
169
- return {
170
- start_button: gr.Button(interactive=False),
171
- stop_button: gr.Button(interactive=True),
172
- main_audio: gr.Audio(visible=False),
173
- }
174
-
175
- def stop_streaming_ui():
176
- logging.info("UI : ArrΓͺt du streaming. RΓ©activation des contrΓ΄les.")
177
- return {
178
- start_button: gr.Button(interactive=True),
179
- stop_button: gr.Button(interactive=False),
180
- main_audio: gr.Audio(
181
- label="Source Audio",
182
- sources=["upload", "microphone"], # Combine les deux sources
183
- type="filepath",
184
- value=active_filepath.value,
185
- visible=True
186
- ),
187
- }
188
-
189
-
190
- ui_components = [
191
- start_button, stop_button,
192
- main_audio,
193
- ]
194
-
195
- stream_event = webrtc_stream.stream(
196
- fn=read_and_stream_audio,
197
- inputs=[active_filepath],
198
- outputs=[webrtc_stream],
199
- trigger=start_button.click,
200
- concurrency_id="audio_stream", # ID de concurrence
201
- concurrency_limit=10
202
- )
203
-
204
- # Mettre Γ  jour l'interface au clic sur START
205
- start_button.click(
206
- fn=start_streaming_ui,
207
- outputs=ui_components
208
- )
209
-
210
- # Correction : S'assurer que le stream est bien annulΓ©
211
- stop_button.click(
212
- fn=stop_streaming,
213
- outputs=[webrtc_stream],
214
- ).then(
215
- fn=stop_streaming_ui, # ENSUITE, mettre Γ  jour l'interface
216
- inputs=None,
217
- outputs=ui_components
218
- )
 
 
 
 
 
 
 
219
 
220
 
221
  if __name__ == "__main__":
222
- demo.queue(max_size=10, api_open=False).launch(show_api=False, debug=True)
 
1
+ from app.logger_config import (
2
+ logger as logging,
3
+ DEBUG
4
+ )
5
  import numpy as np
6
  import gradio as gr
7
  import asyncio
8
  from fastrtc.webrtc import WebRTC
9
+ from fastrtc.utils import AdditionalOutputs
10
  from pydub import AudioSegment
11
  import time
12
+ import os
13
  from gradio.utils import get_space
14
 
 
15
  from app.utils import (
16
+ generate_coturn_config,
17
+ raise_function
18
  )
19
+ from app.session_utils import (
20
+ on_load,
21
+ on_unload,
22
+ get_active_sessions,
23
+ register_session,
24
+ reset_all_active_sessions,
25
+ )
26
+
27
+ from app.ui_utils import (
28
+ SUPPORTED_LANGS_MAP,
29
+ EXAMPLE_CONFIGS,
30
+ apply_preset_if_example,
31
+ reset_to_defaults,
32
+ summarize_config,
33
+ handle_additional_outputs,
34
+ get_custom_theme,
35
+ on_file_load
36
+ )
37
+
38
+ from app.stream_utils import (
39
+ read_and_stream_audio,
40
+ stop_streaming
41
+ )
42
+
43
+ # --------------------------------------------------------
44
+ # Initialization
45
+ # --------------------------------------------------------
46
+ reset_all_active_sessions()
47
 
48
+ theme,css_style = get_custom_theme()
49
+
50
+ with gr.Blocks(theme=theme, css=css_style) as demo:
51
+ session_hash = gr.State()
52
+ session_hash_box = gr.Textbox(label="Session ID", interactive=False, visible=DEBUG)
53
+ with gr.Accordion("πŸ“Š Active Sessions", open=True ,visible=DEBUG):
54
+ sessions_table = gr.DataFrame(
55
+ headers=["session_hash", "file", "start_time", "status"],
56
+ interactive=False,
57
+ wrap=True,
58
+ max_height=200,
59
+ )
60
+ gr.Timer(3.0).tick(fn=get_active_sessions, outputs=sessions_table)
61
+
62
+ demo.load(fn=on_load, inputs=None, outputs=[session_hash, session_hash_box])
63
+ demo.unload(on_unload)
64
+ stop_streaming_flags = gr.State(value={"stop": False})
65
+ active_filepath = gr.State(value=next(iter(EXAMPLE_CONFIGS)))
66
+
67
+ with gr.Walkthrough(selected=0) as walkthrough:
68
+ # === STEP 1 ===
69
+ with gr.Step("Audio", id=0) as audio_source_step:
70
+ gr.Markdown(
71
+ """
72
+ ### Step 1: Upload or Record an Audio File
73
+ You can upload an existing file or record directly from your microphone.
74
+ Accepted formats: **.wav**, **.mp3**, **.flac**
75
+ Maximum length recommended: **60 seconds**
76
+ """
 
 
 
 
 
 
 
 
 
 
 
 
77
  )
78
 
79
+ with gr.Group():
80
+ with gr.Column():
81
+ main_audio = gr.Audio(
82
+ label="Audio Input",
83
+ sources=["upload", "microphone"],
84
+ type="filepath",
85
+ interactive=True
86
+ )
87
+
88
+ with gr.Accordion("Need a quick test? Try one of the sample audios below", open=True):
89
+ examples = gr.Examples(
90
+ examples=list(EXAMPLE_CONFIGS.keys()),
91
+ inputs=main_audio,
92
+ label=None,
93
+ examples_per_page=3
94
+ )
95
+ gr.Markdown(
96
+ """
97
+ πŸ”Ή **english_meeting.wav** – Short business meeting in English
98
+ πŸ”Ή **french_news.wav** – Excerpt from a French radio broadcast
99
+ πŸ”Ή **spanish_podcast.wav** – Segment from a Spanish-language podcast
100
+ """
101
+ )
102
+
103
+ btn = gr.Button("Proceed to Streaming", visible=True)
104
+
105
+ # ui_components_one = [active_filepath, btn]
106
+ # main_audio.change(fn=on_file_load, inputs=[main_audio], outputs=ui_components_one)
107
+ # main_audio.stop_recording(fn=on_file_load, inputs=[main_audio], outputs=ui_components_one)
108
+ # main_audio.clear(fn=on_file_load, inputs=[main_audio], outputs=ui_components_one)
109
+
110
+ btn.click(lambda: gr.Walkthrough(selected=1), outputs=walkthrough)
111
+
112
+ # === STEP 2 ===
113
+ with gr.Step("Stream", id=1) as audio_stream:
114
+ gr.Markdown("### Step 2: Start audio streaming")
115
+ with gr.Group():
116
+ with gr.Column():
117
+ webrtc_stream = WebRTC(
118
+ label="Live Stream",
119
+ mode="receive",
120
+ modality="audio",
121
+ rtc_configuration=generate_coturn_config(),
122
+ visible=True,
123
+ )
124
+ start_button = gr.Button("Start Streaming")
125
+
126
+ webrtc_stream.stream(
127
+ fn=read_and_stream_audio,
128
+ inputs=[active_filepath, session_hash, stop_streaming_flags],
129
+ outputs=[webrtc_stream],
130
+ trigger=start_button.click,
131
+ concurrency_id="audio_stream",
132
+ concurrency_limit=10,
 
 
 
 
 
 
 
 
 
133
  )
134
+
135
+ go_to_config = gr.Button("Go to Configuration", visible=False)
136
+ go_to_config.click(lambda: gr.Walkthrough(selected=2), outputs=walkthrough)
137
+
138
+ # === STEP 3 ===
139
+ with gr.Step("Configuration", id=2):
140
+ gr.Markdown("## Step 3: Configure the Task")
141
+
142
+ task_type = gr.Radio(["Transcription", "Translation"], value="Transcription", label="Task Type")
143
+ lang_source = gr.Dropdown(list(SUPPORTED_LANGS_MAP.keys()), value="French", label="Source Language")
144
+ lang_target = gr.Dropdown(list(SUPPORTED_LANGS_MAP.keys()), value="English", label="Target Language", visible=False)
145
+
146
+ with gr.Accordion("Advanced Configuration", open=False):
147
+ chunk_secs = gr.Number(value=1.0, label="chunk_secs", precision=1)
148
+ left_context_secs = gr.Number(value=20.0, label="left_context_secs", precision=1)
149
+ right_context_secs = gr.Number(value=0.5, label="right_context_secs", precision=1)
150
+ streaming_policy = gr.Dropdown(["waitk", "alignatt"], value="waitk", label="decoding.streaming_policy")
151
+ alignatt_thr = gr.Number(value=8, label="alignatt_thr", precision=0)
152
+ waitk_lagging = gr.Number(value=2, label="waitk_lagging", precision=0)
153
+ exclude_sink_frames = gr.Number(value=8, label="exclude_sink_frames", precision=0)
154
+ xatt_scores_layer = gr.Number(value=-2, label="xatt_scores_layer", precision=0)
155
+ hallucinations_detector = gr.Checkbox(value=True, label="hallucinations_detector")
156
+
157
+ with gr.Row():
158
+ auto_apply_presets = gr.Checkbox(value=True, label="Auto-apply presets for sample audios")
159
+ reset_btn = gr.Button("Reset to defaults")
160
+
161
+ summary_box = gr.Textbox(label="Configuration Summary", lines=10, interactive=False)
162
+
163
+ # --- Events ---
164
+ task_type.change(
165
+ fn=lambda t: gr.update(visible=(t == "Translation")),
166
+ inputs=task_type,
167
+ outputs=lang_target,
168
+ queue=False
169
+ )
170
+
171
+ inputs_list = [
172
+ task_type, lang_source, lang_target,
173
+ chunk_secs, left_context_secs, right_context_secs,
174
+ streaming_policy, alignatt_thr, waitk_lagging,
175
+ exclude_sink_frames, xatt_scores_layer, hallucinations_detector
176
+ ]
177
+ for inp in inputs_list:
178
+ inp.change(
179
+ fn=summarize_config,
180
+ inputs=inputs_list,
181
+ outputs=summary_box,
182
+ queue=False
183
+ )
184
+
185
+ # Apply preset or not
186
+ main_audio.change(
187
+ fn=apply_preset_if_example,
188
+ inputs=[main_audio, auto_apply_presets],
189
+ outputs=[
190
+ task_type, lang_source, lang_target,
191
+ chunk_secs, left_context_secs, right_context_secs,
192
+ streaming_policy, alignatt_thr, waitk_lagging,
193
+ exclude_sink_frames, xatt_scores_layer, hallucinations_detector,
194
+ summary_box
195
+ ],
196
+ queue=False
197
+ )
198
+
199
+ # Reset defaults
200
+ reset_btn.click(
201
+ fn=reset_to_defaults,
202
+ inputs=None,
203
+ outputs=[
204
+ task_type, lang_source, lang_target,
205
+ chunk_secs, left_context_secs, right_context_secs,
206
+ streaming_policy, alignatt_thr, waitk_lagging,
207
+ exclude_sink_frames, xatt_scores_layer, hallucinations_detector,
208
+ summary_box
209
+ ],
210
+ queue=False
211
  )
212
+
213
+ go_to_task = gr.Button("Go to Task")
214
+ go_to_task.click(lambda: gr.Walkthrough(selected=3), outputs=walkthrough)
215
+
216
+ # === STEP 4 ===
217
+ with gr.Step("Task", id=3) as task_step:
218
+ gr.Markdown("## Step 4: Start the Task")
219
+
220
+ with gr.Group():
221
+ with gr.Column():
222
+ status_slider = gr.Slider(
223
+ 0, 100,
224
+ value=0,
225
+ label="Streaming Progress",
226
+ interactive=False,
227
+ visible=False
228
+ )
229
+
230
+ transcription_output = gr.Textbox(
231
+ label="Transcription / Translation Result",
232
+ placeholder="The output text will appear here...",
233
+ lines=10,
234
+ interactive=False,
235
+ visible=True
236
+ )
237
+
238
+ start_task_button = gr.Button("Start Task", visible=True)
239
+ stop_button = gr.Button("Stop Streaming", visible=False)
240
+ stop_task_button = gr.Button("Stop Task", visible=False)
241
+
242
+ stop_button.click(
243
+ fn=stop_streaming,
244
+ inputs=[session_hash, stop_streaming_flags],
245
+ outputs=[stop_streaming_flags],
246
+ )
247
+
248
+ def stop_task_fn():
249
+ return "Task stopped by user."
250
+
251
+ stop_task_button.click(
252
+ fn=stop_task_fn,
253
+ inputs=None,
254
+ outputs=transcription_output
255
+ )
256
+
257
+ ui_components = [
258
+ start_button, stop_button,
259
+ go_to_config, audio_source_step, status_slider
260
+ ]
261
+
262
+ webrtc_stream.on_additional_outputs(
263
+ fn=handle_additional_outputs,
264
+ outputs=ui_components,
265
+ concurrency_id="additional_outputs_audio_stream",
266
+ concurrency_limit=10,
267
+ )
268
+
269
+ # def start_transcription(
270
+ # session_hash, stop_streaming_flags,
271
+ # task_type, lang_source, lang_target,
272
+ # chunk_secs, left_context_secs, right_context_secs,
273
+ # streaming_policy, alignatt_thr, waitk_lagging,
274
+ # exclude_sink_frames, xatt_scores_layer, hallucinations_detector
275
+ # ):
276
+ # if task_type == "Translation":
277
+ # return f"Translation completed ({lang_source} β†’ {lang_target})\n\nTranslated text:\nLorem ipsum..."
278
+ # else:
279
+ # return f"Transcription completed ({lang_source})\n\nTranscribed text:\nHello everyone, this is a test audio stream..."
280
+
281
+ # start_task_button.click(
282
+ # fn=start_transcription,
283
+ # inputs=[
284
+ # session_hash, stop_streaming_flags,
285
+ # task_type, lang_source, lang_target,
286
+ # chunk_secs, left_context_secs, right_context_secs,
287
+ # streaming_policy, alignatt_thr, waitk_lagging,
288
+ # exclude_sink_frames, xatt_scores_layer, hallucinations_detector
289
+ # ],
290
+ # outputs=transcription_output
291
+ # )
292
+
293
+ # def toggle_task_buttons():
294
+ # return (
295
+ # gr.update(visible=False),
296
+ # gr.update(visible=True),
297
+ # gr.update(visible=True)
298
+ # )
299
+
300
+ # start_task_button.click(
301
+ # fn=toggle_task_buttons,
302
+ # inputs=None,
303
+ # outputs=[start_task_button, stop_task_button, stop_button],
304
+ # queue=False
305
+ # )
306
 
307
 
308
  if __name__ == "__main__":
309
+ demo.queue(max_size=10, api_open=False).launch(show_api=False, debug=True)
app/logger_config.py CHANGED
@@ -11,7 +11,6 @@ import os
11
  # nemo_logger.remove_stream_handlers()
12
 
13
  DEBUG = os.getenv("DEBUG", "false").lower() == "true"
14
- print(DEBUG)
15
  # Create "logs" directory if it doesn't exist
16
  os.makedirs("logs", exist_ok=True)
17
 
 
11
  # nemo_logger.remove_stream_handlers()
12
 
13
  DEBUG = os.getenv("DEBUG", "false").lower() == "true"
 
14
  # Create "logs" directory if it doesn't exist
15
  os.makedirs("logs", exist_ok=True)
16
 
app/old_session_utils.py ADDED
@@ -0,0 +1,273 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+ import uuid
4
+ import shutil
5
+ from datetime import datetime
6
+ from app.logger_config import logger as logging
7
+ import gradio as gr
8
+ # TMP_DIR = "/tmp/canary_aed_streaming"
9
+
10
+ TMP_DIR = os.getenv("TMP_DIR", "/tmp/canary_aed_streaming")
11
+ ACTIVE_SESSIONS_FILE = os.path.join(TMP_DIR, "active_sessions.json")
12
+
13
+
14
+ # ---------------------------
15
+ # Helper to manage the JSON
16
+ # ---------------------------
17
+ def _read_sessions():
18
+ if not os.path.exists(ACTIVE_SESSIONS_FILE):
19
+ return {}
20
+ try:
21
+ with open(ACTIVE_SESSIONS_FILE, "r") as f:
22
+ return json.load(f)
23
+ except Exception:
24
+ return {}
25
+
26
+
27
+ def _write_sessions(data):
28
+ os.makedirs(os.path.dirname(ACTIVE_SESSIONS_FILE), exist_ok=True)
29
+ with open(ACTIVE_SESSIONS_FILE, "w") as f:
30
+ json.dump(data, f, indent=2)
31
+
32
+
33
+ # ---------------------------
34
+ # LOAD
35
+ # ---------------------------
36
+ def on_load(request: gr.Request):
37
+ """Called when a new visitor opens the app."""
38
+ sid = request.session_hash # βœ… Directly use session_hash as unique ID
39
+ sessions = _read_sessions()
40
+
41
+ sessions[sid] = {
42
+ "session_id": sid,
43
+ "file": "",
44
+ "start_time": datetime.utcnow().strftime("%H:%M:%S"),
45
+ "status": "active",
46
+ }
47
+
48
+ _write_sessions(sessions)
49
+ logging.info(f"[{sid}] Session registered (on_load).")
50
+
51
+ return sid, sid # can be used as gr.State + display
52
+
53
+
54
+ # ---------------------------
55
+ # UNLOAD
56
+ # ---------------------------
57
+ def on_unload(request: gr.Request):
58
+ """Called when the visitor closes or refreshes the app."""
59
+ sid = request.session_hash
60
+ sessions = _read_sessions()
61
+
62
+ if sid in sessions:
63
+ create_stop_flag(sid)
64
+ sessions.pop(sid)
65
+ _write_sessions(sessions)
66
+ remove_session_data(sid)
67
+ unregister_session(sid)
68
+ logging.info(f"[{sid}] Session removed (on_unload).")
69
+ else:
70
+ logging.info(f"[{sid}] No active session found to remove.")
71
+
72
+ def ensure_tmp_dir():
73
+ """Ensures the base temporary directory exists."""
74
+ try:
75
+ os.makedirs(TMP_DIR, exist_ok=True)
76
+ except Exception as e:
77
+ logging.error(f"Failed to create tmp directory {TMP_DIR}: {e}")
78
+
79
+
80
+ def reset_all_active_sessions():
81
+ """Removes all temporary session files and folders at startup."""
82
+ ensure_tmp_dir()
83
+
84
+ try:
85
+ # --- Remove active sessions file ---
86
+ if os.path.exists(ACTIVE_SESSIONS_FILE):
87
+ os.remove(ACTIVE_SESSIONS_FILE)
88
+ logging.info("Active sessions file reset at startup.")
89
+ else:
90
+ logging.debug("No active sessions file found to reset.")
91
+
92
+ # --- Clean progress files ---
93
+ for f in os.listdir(TMP_DIR):
94
+ if f.startswith("progress_") and f.endswith(".json"):
95
+ path = os.path.join(TMP_DIR, f)
96
+ try:
97
+ os.remove(path)
98
+ logging.debug(f"Removed leftover progress file: {f}")
99
+ except Exception as e:
100
+ logging.warning(f"Failed to remove progress file {f}: {e}")
101
+
102
+ # --- Clean all flag files (stream + transcribe) ---
103
+ for f in os.listdir(TMP_DIR):
104
+ if (
105
+ f.startswith("stream_stop_flag_")
106
+ or f.startswith("transcribe_stop_flag_")
107
+ or f.startswith("transcribe_active_")
108
+ ) and f.endswith(".txt"):
109
+ path = os.path.join(TMP_DIR, f)
110
+ try:
111
+ os.remove(path)
112
+ logging.debug(f"Removed leftover flag file: {f}")
113
+ except Exception as e:
114
+ logging.warning(f"Failed to remove flag file {f}: {e}")
115
+
116
+ # --- Clean chunk directories ---
117
+ for name in os.listdir(TMP_DIR):
118
+ path = os.path.join(TMP_DIR, name)
119
+ if os.path.isdir(path) and name.startswith("chunks_"):
120
+ try:
121
+ shutil.rmtree(path)
122
+ logging.debug(f"Removed leftover chunk folder: {name}")
123
+ except Exception as e:
124
+ logging.warning(f"Failed to remove chunk folder {name}: {e}")
125
+
126
+ logging.info("Temporary session cleanup completed successfully.")
127
+
128
+ except Exception as e:
129
+ logging.error(f"Error resetting active sessions: {e}")
130
+
131
+ def remove_session_data(session_id: str):
132
+ """Removes all temporary files and data related to a specific session."""
133
+ if not session_id:
134
+ logging.warning("reset_session() called without a valid session_id.")
135
+ return
136
+
137
+ try:
138
+ # --- Remove session from active_sessions.json ---
139
+ if os.path.exists(ACTIVE_SESSIONS_FILE):
140
+ try:
141
+ with open(ACTIVE_SESSIONS_FILE, "r") as f:
142
+ data = json.load(f)
143
+ if session_id in data:
144
+ data.pop(session_id)
145
+ with open(ACTIVE_SESSIONS_FILE, "w") as f:
146
+ json.dump(data, f, indent=2)
147
+ logging.debug(f"[{session_id}] Removed from active_sessions.json.")
148
+ except Exception as e:
149
+ logging.warning(f"[{session_id}] Failed to update active_sessions.json: {e}")
150
+
151
+ # --- Define all possible session file patterns ---
152
+ files_to_remove = [
153
+ f"progress_{session_id}.json",
154
+ # f"stream_stop_flag_{session_id}.txt",
155
+ f"transcribe_stop_flag_{session_id}.txt",
156
+ f"transcribe_active_{session_id}.txt",
157
+ ]
158
+
159
+ # --- Remove all temporary files ---
160
+ for fname in files_to_remove:
161
+ path = os.path.join(TMP_DIR, fname)
162
+ if os.path.exists(path):
163
+ try:
164
+ os.remove(path)
165
+ logging.debug(f"[{session_id}] Removed file: {fname}")
166
+ except Exception as e:
167
+ logging.warning(f"[{session_id}] Failed to remove file {fname}: {e}")
168
+
169
+ # --- Remove chunk folder if exists ---
170
+ chunk_dir = os.path.join(TMP_DIR, f"chunks_{session_id}")
171
+ if os.path.isdir(chunk_dir):
172
+ try:
173
+ shutil.rmtree(chunk_dir)
174
+ logging.debug(f"[{session_id}] Removed chunk folder: chunks_{session_id}")
175
+ except Exception as e:
176
+ logging.warning(f"[{session_id}] Failed to remove chunk folder: {e}")
177
+
178
+ logging.info(f"[{session_id}] Session fully reset.")
179
+
180
+ except Exception as e:
181
+ logging.error(f"[{session_id}] Error during reset_session: {e}")
182
+ def generate_session_id() -> str:
183
+ """Generates a unique session ID."""
184
+ sid = str(uuid.uuid4())
185
+ logging.debug(f"[{sid}] New session created.")
186
+ return sid
187
+
188
+
189
+ def register_session(session_id: str, filepath: str):
190
+ """Registers a new session."""
191
+ ensure_tmp_dir()
192
+ data = {}
193
+ if os.path.exists(ACTIVE_SESSIONS_FILE):
194
+ with open(ACTIVE_SESSIONS_FILE, "r") as f:
195
+ try:
196
+ data = json.load(f)
197
+ except Exception:
198
+ data = {}
199
+
200
+ data[session_id] = {
201
+ "session_id": session_id,
202
+ "file": filepath,
203
+ "start_time": datetime.utcnow().strftime("%H:%M:%S"),
204
+ "status": "active",
205
+ }
206
+
207
+ with open(ACTIVE_SESSIONS_FILE, "w") as f:
208
+ json.dump(data, f)
209
+
210
+ logging.debug(f"[{session_id}] Session registered in active_sessions.json.")
211
+
212
+
213
+ def unregister_session(session_id: str):
214
+ """Removes a session from the registry."""
215
+ if not os.path.exists(ACTIVE_SESSIONS_FILE):
216
+ return
217
+
218
+ try:
219
+ with open(ACTIVE_SESSIONS_FILE, "r") as f:
220
+ data = json.load(f)
221
+ if session_id in data:
222
+ data.pop(session_id)
223
+ with open(ACTIVE_SESSIONS_FILE, "w") as f:
224
+ json.dump(data, f)
225
+ logging.debug(f"[{session_id}] Session unregistered.")
226
+ except Exception as e:
227
+ logging.error(f"[{session_id}] Error unregistering session: {e}")
228
+
229
+
230
+ def get_active_sessions():
231
+ """Returns active sessions as a list of rows for the DataFrame."""
232
+ if not os.path.exists(ACTIVE_SESSIONS_FILE):
233
+ return []
234
+
235
+ try:
236
+ with open(ACTIVE_SESSIONS_FILE, "r") as f:
237
+ data = json.load(f)
238
+
239
+ rows = [
240
+ [
241
+ s.get("session_id", ""),
242
+ s.get("file", ""),
243
+ s.get("start_time", ""),
244
+ s.get("status", ""),
245
+ ]
246
+ for s in data.values()
247
+ ]
248
+ return rows
249
+ except Exception as e:
250
+ logging.error(f"Error reading active sessions: {e}")
251
+ return []
252
+
253
+
254
+ def stop_file_path(session_id: str) -> str:
255
+ """Returns the stop-flag file path for a given session."""
256
+ ensure_tmp_dir()
257
+ return os.path.join(TMP_DIR, f"stream_stop_flag_{session_id}.txt")
258
+
259
+
260
+ def create_stop_flag(session_id: str):
261
+ """Creates a stop-flag file for this session."""
262
+ path = stop_file_path(session_id)
263
+ with open(path, "w") as f:
264
+ f.write("1")
265
+ logging.info(f"[{session_id}] Stop flag file created at {path}.")
266
+
267
+
268
+ def clear_stop_flag(session_id: str):
269
+ """Deletes the stop-flag file if it exists."""
270
+ path = stop_file_path(session_id)
271
+ if os.path.exists(path):
272
+ os.remove(path)
273
+ logging.debug(f"[{session_id}] Stop flag cleared.")
app/session_utils.py CHANGED
@@ -35,20 +35,20 @@ def _write_sessions(data):
35
  # ---------------------------
36
  def on_load(request: gr.Request):
37
  """Called when a new visitor opens the app."""
38
- sid = request.session_hash # βœ… Directly use session_hash as unique ID
39
  sessions = _read_sessions()
40
 
41
- sessions[sid] = {
42
- "session_id": sid,
43
  "file": "",
44
  "start_time": datetime.utcnow().strftime("%H:%M:%S"),
45
  "status": "active",
46
  }
47
 
48
  _write_sessions(sessions)
49
- logging.info(f"[{sid}] Session registered (on_load).")
50
 
51
- return sid, sid # can be used as gr.State + display
52
 
53
 
54
  # ---------------------------
@@ -60,7 +60,6 @@ def on_unload(request: gr.Request):
60
  sessions = _read_sessions()
61
 
62
  if sid in sessions:
63
- create_stop_flag(sid)
64
  sessions.pop(sid)
65
  _write_sessions(sessions)
66
  remove_session_data(sid)
@@ -88,43 +87,7 @@ def reset_all_active_sessions():
88
  logging.info("Active sessions file reset at startup.")
89
  else:
90
  logging.debug("No active sessions file found to reset.")
91
-
92
- # --- Clean progress files ---
93
- for f in os.listdir(TMP_DIR):
94
- if f.startswith("progress_") and f.endswith(".json"):
95
- path = os.path.join(TMP_DIR, f)
96
- try:
97
- os.remove(path)
98
- logging.debug(f"Removed leftover progress file: {f}")
99
- except Exception as e:
100
- logging.warning(f"Failed to remove progress file {f}: {e}")
101
-
102
- # --- Clean all flag files (stream + transcribe) ---
103
- for f in os.listdir(TMP_DIR):
104
- if (
105
- f.startswith("stream_stop_flag_")
106
- or f.startswith("transcribe_stop_flag_")
107
- or f.startswith("transcribe_active_")
108
- ) and f.endswith(".txt"):
109
- path = os.path.join(TMP_DIR, f)
110
- try:
111
- os.remove(path)
112
- logging.debug(f"Removed leftover flag file: {f}")
113
- except Exception as e:
114
- logging.warning(f"Failed to remove flag file {f}: {e}")
115
-
116
- # --- Clean chunk directories ---
117
- for name in os.listdir(TMP_DIR):
118
- path = os.path.join(TMP_DIR, name)
119
- if os.path.isdir(path) and name.startswith("chunks_"):
120
- try:
121
- shutil.rmtree(path)
122
- logging.debug(f"Removed leftover chunk folder: {name}")
123
- except Exception as e:
124
- logging.warning(f"Failed to remove chunk folder {name}: {e}")
125
-
126
- logging.info("Temporary session cleanup completed successfully.")
127
-
128
  except Exception as e:
129
  logging.error(f"Error resetting active sessions: {e}")
130
 
@@ -198,7 +161,7 @@ def register_session(session_id: str, filepath: str):
198
  data = {}
199
 
200
  data[session_id] = {
201
- "session_id": session_id,
202
  "file": filepath,
203
  "start_time": datetime.utcnow().strftime("%H:%M:%S"),
204
  "status": "active",
@@ -238,7 +201,7 @@ def get_active_sessions():
238
 
239
  rows = [
240
  [
241
- s.get("session_id", ""),
242
  s.get("file", ""),
243
  s.get("start_time", ""),
244
  s.get("status", ""),
@@ -251,23 +214,7 @@ def get_active_sessions():
251
  return []
252
 
253
 
254
- def stop_file_path(session_id: str) -> str:
255
- """Returns the stop-flag file path for a given session."""
256
- ensure_tmp_dir()
257
- return os.path.join(TMP_DIR, f"stream_stop_flag_{session_id}.txt")
258
 
259
 
260
- def create_stop_flag(session_id: str):
261
- """Creates a stop-flag file for this session."""
262
- path = stop_file_path(session_id)
263
- with open(path, "w") as f:
264
- f.write("1")
265
- logging.info(f"[{session_id}] Stop flag file created at {path}.")
266
 
267
 
268
- def clear_stop_flag(session_id: str):
269
- """Deletes the stop-flag file if it exists."""
270
- path = stop_file_path(session_id)
271
- if os.path.exists(path):
272
- os.remove(path)
273
- logging.debug(f"[{session_id}] Stop flag cleared.")
 
35
  # ---------------------------
36
  def on_load(request: gr.Request):
37
  """Called when a new visitor opens the app."""
38
+ session_hash = request.session_hash # βœ… Directly use session_hash as unique ID
39
  sessions = _read_sessions()
40
 
41
+ sessions[session_hash] = {
42
+ "session_hash": session_hash,
43
  "file": "",
44
  "start_time": datetime.utcnow().strftime("%H:%M:%S"),
45
  "status": "active",
46
  }
47
 
48
  _write_sessions(sessions)
49
+ logging.info(f"[{session_hash}] Session registered (on_load).")
50
 
51
+ return session_hash, session_hash # can be used as gr.State + display
52
 
53
 
54
  # ---------------------------
 
60
  sessions = _read_sessions()
61
 
62
  if sid in sessions:
 
63
  sessions.pop(sid)
64
  _write_sessions(sessions)
65
  remove_session_data(sid)
 
87
  logging.info("Active sessions file reset at startup.")
88
  else:
89
  logging.debug("No active sessions file found to reset.")
90
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91
  except Exception as e:
92
  logging.error(f"Error resetting active sessions: {e}")
93
 
 
161
  data = {}
162
 
163
  data[session_id] = {
164
+ "session_hash": session_id,
165
  "file": filepath,
166
  "start_time": datetime.utcnow().strftime("%H:%M:%S"),
167
  "status": "active",
 
201
 
202
  rows = [
203
  [
204
+ s.get("session_hash", ""),
205
  s.get("file", ""),
206
  s.get("start_time", ""),
207
  s.get("status", ""),
 
214
  return []
215
 
216
 
 
 
 
 
217
 
218
 
 
 
 
 
 
 
219
 
220
 
 
 
 
 
 
 
app/stream_utils.py ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ from app.logger_config import logger as logging
3
+ from fastrtc.utils import AdditionalOutputs
4
+ from pydub import AudioSegment
5
+ import asyncio
6
+ import os
7
+ import time
8
+ import numpy as np
9
+
10
+ # --------------------------------------------------------
11
+ # Utility functions
12
+ # --------------------------------------------------------
13
+
14
+ def read_and_stream_audio(filepath_to_stream: str, session_id: str, stop_streaming_flags: dict):
15
+ """
16
+ Read an audio file and stream it chunk by chunk (1s per chunk).
17
+ Handles errors safely and reports structured messages to the client.
18
+ """
19
+ if not session_id:
20
+ yield from handle_stream_error("unknown", "No session_id provided.", stop_streaming_flags)
21
+ return
22
+
23
+ if not filepath_to_stream or not os.path.exists(filepath_to_stream):
24
+ yield from handle_stream_error(session_id, f"Audio file not found: {filepath_to_stream}", stop_streaming_flags)
25
+ return
26
+
27
+ try:
28
+ segment = AudioSegment.from_file(filepath_to_stream)
29
+ chunk_duration_ms = 1000
30
+ total_chunks = len(segment) // chunk_duration_ms + 1
31
+ logging.info(f"[{session_id}] Starting audio streaming ({total_chunks} chunks).")
32
+
33
+ for i, chunk in enumerate(segment[::chunk_duration_ms]):
34
+ if _is_stop_requested(stop_streaming_flags):
35
+ logging.info(f"[{session_id}] Stop signal received. Terminating stream.")
36
+ break
37
+
38
+ frame_rate = chunk.frame_rate
39
+ samples = np.array(chunk.get_array_of_samples()).reshape(1, -1)
40
+ progress = round(((i + 1) / total_chunks) * 100, 2)
41
+
42
+ yield ((frame_rate, samples), AdditionalOutputs(progress))
43
+ logging.debug(f"[{session_id}] Sent chunk {i+1}/{total_chunks} ({progress}%).")
44
+
45
+ time.sleep(1)
46
+ # raise_function() # Optional injected test exception
47
+
48
+ logging.info(f"[{session_id}] Audio streaming completed successfully.")
49
+
50
+ except asyncio.CancelledError:
51
+ yield from handle_stream_error(session_id, "Streaming cancelled by user.", stop_streaming_flags)
52
+ except FileNotFoundError as e:
53
+ yield from handle_stream_error(session_id, e, stop_streaming_flags)
54
+ except Exception as e:
55
+ yield from handle_stream_error(session_id, e, stop_streaming_flags)
56
+ finally:
57
+ if isinstance(stop_streaming_flags, dict):
58
+ stop_streaming_flags["stop"] = False
59
+ logging.info(f"[{session_id}] Stop flag reset.")
60
+ yield (None, AdditionalOutputs("STREAM_DONE"))
61
+
62
+
63
+
64
+ def handle_stream_error(session_id: str, error: Exception | str, stop_streaming_flags: dict | None = None):
65
+ """
66
+ Handle streaming errors:
67
+ - Log the error
68
+ - Send structured info to client
69
+ - Reset stop flag
70
+ """
71
+ if isinstance(error, Exception):
72
+ msg = f"{type(error).__name__}: {str(error)}"
73
+ else:
74
+ msg = str(error)
75
+
76
+ logging.error(f"[{session_id}] Streaming error: {msg}", exc_info=isinstance(error, Exception))
77
+
78
+ if isinstance(stop_streaming_flags, dict):
79
+ stop_streaming_flags["stop"] = False
80
+
81
+ yield (None, AdditionalOutputs({"error": True, "message": msg}))
82
+ yield (None, AdditionalOutputs("STREAM_DONE"))
83
+
84
+
85
+ def _is_stop_requested(stop_streaming_flags: dict) -> bool:
86
+ """Check if the stop signal was requested."""
87
+ if not isinstance(stop_streaming_flags, dict):
88
+ return False
89
+ return bool(stop_streaming_flags.get("stop", False))
90
+
91
+
92
+
93
+ def stop_streaming(session_id: str, stop_streaming_flags: dict):
94
+ """Trigger the stop flag for active streaming."""
95
+ logging.info(f"[{session_id}] Stop button clicked β€” sending stop signal.")
96
+ if not isinstance(stop_streaming_flags, dict):
97
+ stop_streaming_flags = {"stop": True}
98
+ else:
99
+ stop_streaming_flags["stop"] = True
100
+ return stop_streaming_flags
101
+
102
+
app/ui_utils.py ADDED
@@ -0,0 +1,203 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from app.logger_config import logger as logging
2
+ import gradio as gr
3
+ from pathlib import Path
4
+ import os
5
+ DEFAULT_CONFIG = {
6
+ "task_type": "Transcription",
7
+ "lang_source": "French",
8
+ "lang_target": "English",
9
+ "chunk_secs": 1.0,
10
+ "left_context_secs": 20.0,
11
+ "right_context_secs": 0.5,
12
+ "streaming_policy": "waitk",
13
+ "alignatt_thr": 8,
14
+ "waitk_lagging": 2,
15
+ "exclude_sink_frames": 8,
16
+ "xatt_scores_layer": -2,
17
+ "hallucinations_detector": True,
18
+ }
19
+
20
+
21
+ EXAMPLE_CONFIGS = {
22
+ "data/english_meeting.wav": {
23
+ "task_type": "Transcription", "lang_source": "English", "lang_target": "English",
24
+ "chunk_secs": 1.0, "left_context_secs": 20.0, "right_context_secs": 0.5,
25
+ "streaming_policy": "waitk", "alignatt_thr": 8, "waitk_lagging": 2,
26
+ "exclude_sink_frames": 8, "xatt_scores_layer": -2, "hallucinations_detector": True
27
+ },
28
+ "data/french_news.wav": {
29
+ "task_type": "Transcription", "lang_source": "French", "lang_target": "English",
30
+ "chunk_secs": 1.0, "left_context_secs": 15.0, "right_context_secs": 0.3,
31
+ "streaming_policy": "alignatt", "alignatt_thr": 10, "waitk_lagging": 3,
32
+ "exclude_sink_frames": 6, "xatt_scores_layer": -1, "hallucinations_detector": True
33
+ },
34
+ "data/spanish_podcast.wav": {
35
+ "task_type": "Translation", "lang_source": "Spanish", "lang_target": "English",
36
+ "chunk_secs": 1.5, "left_context_secs": 25.0, "right_context_secs": 0.4,
37
+ "streaming_policy": "waitk", "alignatt_thr": 7, "waitk_lagging": 1,
38
+ "exclude_sink_frames": 8, "xatt_scores_layer": -2, "hallucinations_detector": False
39
+ }
40
+ }
41
+
42
+ SUPPORTED_LANGS_MAP = {
43
+ "Bulgarian": "bg", "Croatian": "hr", "Czech": "cs", "Danish": "da",
44
+ "Dutch": "nl", "English": "en", "Estonian": "et", "Finnish": "fi",
45
+ "French": "fr", "German": "de", "Greek": "el", "Hungarian": "hu",
46
+ "Italian": "it", "Latvian": "lv", "Lithuanian": "lt", "Maltese": "mt",
47
+ "Polish": "pl", "Portuguese": "pt", "Romanian": "ro", "Slovak": "sk",
48
+ "Slovenian": "sl", "Spanish": "es", "Swedish": "sv", "Russian": "ru", "Ukrainian": "uk"
49
+ }
50
+
51
+
52
+
53
+ # ========== FONCTIONS UTILITAIRES ==========
54
+
55
+ def to_updates(cfg):
56
+ """Map dict -> gr.update list dans l'ordre des sorties."""
57
+ return [
58
+ gr.update(value=cfg["task_type"]),
59
+ gr.update(value=cfg["lang_source"]),
60
+ gr.update(
61
+ value=cfg["lang_target"],
62
+ visible=(cfg["task_type"] == "Translation")
63
+ ),
64
+ gr.update(value=cfg["chunk_secs"]),
65
+ gr.update(value=cfg["left_context_secs"]),
66
+ gr.update(value=cfg["right_context_secs"]),
67
+ gr.update(value=cfg["streaming_policy"]),
68
+ gr.update(value=cfg["alignatt_thr"]),
69
+ gr.update(value=cfg["waitk_lagging"]),
70
+ gr.update(value=cfg["exclude_sink_frames"]),
71
+ gr.update(value=cfg["xatt_scores_layer"]),
72
+ gr.update(value=cfg["hallucinations_detector"]),
73
+ ]
74
+
75
+ def apply_preset_if_example(filepath, auto_apply):
76
+ """Si fichier = exemple ET auto_apply=True -> applique preset. Sinon, ne rien changer."""
77
+ logging.info(f"apply_preset_if_example {filepath} {auto_apply} ")
78
+ if not filepath or not auto_apply:
79
+ updates = [gr.update() for _ in range(12)]
80
+ updates.append(gr.update())
81
+ return tuple(updates)
82
+
83
+ # On compare uniquement le nom de fichier, pas le chemin complet
84
+ file_name = Path(filepath).name
85
+
86
+ # Recherche dans EXAMPLE_CONFIGS par nom de fichier
87
+ cfg = next(
88
+ (config for path, config in EXAMPLE_CONFIGS.items() if Path(path).name == file_name),
89
+ None
90
+ )
91
+
92
+ if not cfg:
93
+ updates = [gr.update() for _ in range(12)]
94
+ updates.append(gr.update())
95
+ return tuple(updates)
96
+
97
+ updates = to_updates(cfg)
98
+ updates.append(gr.update(value=f"Preset applied for: {file_name}"))
99
+ return tuple(updates)
100
+
101
+ def reset_to_defaults():
102
+ """RΓ©initialise tous les champs aux valeurs par dΓ©faut."""
103
+ updates = to_updates(DEFAULT_CONFIG) # 12 champs
104
+ # Ajout du rΓ©sumΓ© (13e sortie)
105
+ updates.append(gr.update(value="Defaults restored."))
106
+ return tuple(updates)
107
+
108
+ def summarize_config(
109
+ task, src, tgt,
110
+ chunk, left, right,
111
+ policy, thr, lag, sink, xatt, halluc
112
+ ):
113
+ txt = f"🧠 **Task:** {task}\n🌐 **Source language:** {src}"
114
+ if task == "Translation":
115
+ txt += f"\n🎯 **Target language:** {tgt}"
116
+ txt += (
117
+ f"\n\n### βš™οΈ Advanced Parameters:\n"
118
+ f"- chunk_secs = {chunk}\n"
119
+ f"- left_context_secs = {left}\n"
120
+ f"- right_context_secs = {right}\n"
121
+ f"- decoding.streaming_policy = {policy}\n"
122
+ f"- decoding.alignatt_thr = {thr}\n"
123
+ f"- decoding.waitk_lagging = {lag}\n"
124
+ f"- decoding.exclude_sink_frames = {sink}\n"
125
+ f"- decoding.xatt_scores_layer = {xatt}\n"
126
+ f"- decoding.hallucinations_detector = {halluc}"
127
+ )
128
+ return txt
129
+
130
+ def handle_additional_outputs( progress_value):
131
+ """
132
+ Update UI elements based on streaming progress or errors.
133
+ Controls button states, audio visibility, and progress slider.
134
+ """
135
+ logging.debug(f"Additional output received: {progress_value}")
136
+ # ui_components = [start_button, stop_button,go_to_task, audio_source_step, status_slider]
137
+
138
+ # Handle structured error message
139
+ non_ok= (
140
+ gr.update(visible=True), # start_button enabled
141
+ gr.update(visible=False), # stop_button disabled
142
+ gr.update(visible=False), # go_to_task disabled
143
+ gr.update(interactive=True), # audio_source_step re-shown
144
+ gr.update(visible=False, value=0), # slider hidden
145
+ )
146
+ if isinstance(progress_value, dict) and progress_value.get("error"):
147
+ msg = progress_value.get("message", "Unknown error.")
148
+ logging.error(f"[stream_ui] Client-side error: {msg}")
149
+ return non_ok
150
+
151
+ try:
152
+ progress = float(progress_value)
153
+ except (ValueError, TypeError):
154
+ progress = 0
155
+
156
+ # --- Stream not started ---
157
+ if progress <= 0:
158
+ return non_ok
159
+
160
+ # --- Stream finished ---
161
+ if progress >= 100:
162
+ return non_ok
163
+
164
+ # --- Stream in progress ---
165
+ return (
166
+ gr.update(visible=False), # start_button disabled
167
+ gr.update(visible=True), # stop_button enabled
168
+ gr.update(visible=True), # go_to_task enabled
169
+ gr.update(interactive=False), # hide audio_source_step
170
+ gr.update(visible=True, value=progress), # show progress
171
+ )
172
+
173
+
174
+ def on_file_load(filepath):
175
+ """
176
+ Update active audio path or reset".
177
+ """
178
+ # Si un fichier est chargΓ© (upload, micro, ou exemple),
179
+ # audio_path ne sera pas None.
180
+ is_visible = filepath is not None
181
+ return filepath, gr.update(visible=is_visible)
182
+
183
+
184
+
185
+ def get_custom_theme() :
186
+ # === Thème personnalisé (studio néon) ===
187
+ theme = gr.themes.Base(
188
+ primary_hue="blue",
189
+ secondary_hue="indigo",
190
+ ).set(
191
+ body_background_fill="#F7F8FA",
192
+ body_text_color="#222222",
193
+ block_border_color="#D0D3D9",
194
+ button_primary_background_fill="#3B82F6",
195
+ button_primary_background_fill_hover="#2563EB",
196
+ button_primary_text_color="#FFFFFF",
197
+ )
198
+
199
+
200
+ css_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), "assets", "custom_style.css")
201
+ with open(css_path, encoding="utf-8") as f:
202
+ css_style = f.read()
203
+ return theme, css_style
app/utils.py CHANGED
@@ -5,6 +5,7 @@ import hashlib
5
  import base64
6
  import os
7
  import time
 
8
 
9
  def debug_current_device():
10
  """Safely logs GPU or CPU information without crashing on stateless GPU."""
@@ -85,4 +86,14 @@ def generate_coturn_config():
85
  }
86
  ]
87
  }
88
- return coturn_config
 
 
 
 
 
 
 
 
 
 
 
5
  import base64
6
  import os
7
  import time
8
+ import random
9
 
10
  def debug_current_device():
11
  """Safely logs GPU or CPU information without crashing on stateless GPU."""
 
86
  }
87
  ]
88
  }
89
+ return coturn_config
90
+
91
+
92
+
93
+
94
+
95
+
96
+ def raise_function():
97
+ """Raise an error randomly (1 out of 10 times)."""
98
+ if random.randint(1, 50) == 1:
99
+ raise RuntimeError("Random failure triggered!")
assets/custom_style.css ADDED
@@ -0,0 +1,137 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ :root {
2
+ --accent-blue: #3B82F6;
3
+ --accent-indigo: #6366F1;
4
+ --light-bg: #FFFFFF;
5
+ --soft-shadow: 0 4px 15px rgba(0, 0, 0, 0.05);
6
+ }
7
+ #root, .gradio-container {
8
+ display: flex;
9
+ flex-direction: column;
10
+ align-items: center;
11
+ }
12
+ .gradio-container {
13
+ max-width: 960px;
14
+ min-width: 960px;
15
+ margin: 0 auto !important;
16
+ padding: 1.5rem;
17
+ }
18
+ body {
19
+ background: #F4F6F8;
20
+ display: flex;
21
+ justify-content: center;
22
+ }
23
+ @media (max-width: 768px) {
24
+ .gradio-container {
25
+ max-width: 95%;
26
+ padding: 0.5rem;
27
+ }
28
+ .gr-button {
29
+ width: 100%;
30
+ }
31
+ }
32
+ .gr-block {
33
+ width: 100%;
34
+ margin: 0 auto;
35
+ border-radius: 12px;
36
+ box-shadow: 0 2px 12px rgba(0, 0, 0, 0.05);
37
+ }
38
+ .gr-button {
39
+ background: var(--accent-blue);
40
+ border-radius: 10px;
41
+ box-shadow: var(--soft-shadow);
42
+ transition: all 0.25s ease;
43
+ font-weight: 600;
44
+ text-transform: uppercase;
45
+ }
46
+ .gr-button:hover {
47
+ background: var(--accent-indigo);
48
+ transform: translateY(-2px);
49
+ }
50
+ .gr-block {
51
+ background: var(--light-bg);
52
+ border: 1px solid #E5E7EB;
53
+ border-radius: 12px;
54
+ box-shadow: var(--soft-shadow);
55
+ padding: 12px;
56
+ }
57
+ .gr-markdown h2 {
58
+ color: #1E3A8A;
59
+ font-weight: 700;
60
+ }
61
+ .gr-radio, .gr-dropdown, .gr-number, .gr-checkbox {
62
+ max-width: 320px;
63
+ margin: 0.5rem auto;
64
+ }
65
+ .gr-textbox {
66
+ max-width: 600px;
67
+ margin: 1rem auto;
68
+ }
69
+ .gr-button {
70
+ max-width: 250px;
71
+ margin: 1rem auto;
72
+ display: block;
73
+ }
74
+ /* ==== Custom style for examples section ==== */
75
+ /* === Clean style for audio examples === */
76
+ .gr-examples {
77
+ background: #FFFFFF !important; /* fond blanc */
78
+ border: 1px solid #E5E7EB !important; /* bordure légère */
79
+ border-radius: 10px !important;
80
+ box-shadow: 0 2px 8px rgba(0, 0, 0, 0.03);
81
+ padding: 0.5rem 1rem !important;
82
+ }
83
+
84
+ .gr-examples .example {
85
+ background: #FAFAFA !important; /* gris ultra clair */
86
+ border: 1px solid #E5E7EB !important;
87
+ border-radius: 8px !important;
88
+ color: #1F2937 !important;
89
+ font-weight: 500;
90
+ transition: all 0.2s ease-in-out;
91
+ }
92
+
93
+ .gr-examples .example:hover {
94
+ background: #EFF6FF !important; /* bleu très clair au survol */
95
+ border-color: #3B82F6 !important;
96
+ transform: translateY(-1px);
97
+ cursor: pointer;
98
+ }
99
+
100
+ .gr-examples-label {
101
+ font-weight: 600;
102
+ color: #1E3A8A;
103
+ margin-bottom: 0.5rem;
104
+ background: transparent !important;
105
+ border: none !important;
106
+ }
107
+
108
+ /* supprime le fond gris hΓ©ritΓ© sur le conteneur interne */
109
+ .gr-panel {
110
+ background: transparent !important;
111
+ border: none !important;
112
+ }
113
+ /* === Smooth step transition animation === */
114
+ [data-testid="block"] {
115
+ opacity: 0;
116
+ ransform: translateY(10px) scale(0.98);;
117
+ transition: opacity 0.5s ease, transform 0.5s ease;
118
+ }
119
+
120
+ /* Step visible (Gradio auto-class) */
121
+ [data-testid="block"].svelte-drum21,
122
+ [data-testid="block"]:not([style*="display: none"]) {
123
+ opacity: 1 !important;
124
+ ransform: translateY(10px) scale(0.98); !important;
125
+ }
126
+
127
+ /* Smooth transitions when changing steps */
128
+ .gr-walkthrough > div {
129
+ transition: all 0.4s ease-in-out;
130
+ }
131
+
132
+ /* Optional: soft highlight on current step */
133
+ .gr-step-header[data-selected="true"] {
134
+ background: #EFF6FF !important;
135
+ border-radius: 6px;
136
+ box-shadow: 0 0 6px rgba(59,130,246,0.3);
137
+ }
data/english_meeting.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ea84e513e9d6ef3cb69638fb658d581b9c3979c50559fa0d196d2e3896b7d9c8
3
+ size 21024206
data/french_news.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ea84e513e9d6ef3cb69638fb658d581b9c3979c50559fa0d196d2e3896b7d9c8
3
+ size 21024206
data/spanish_podcast.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ea84e513e9d6ef3cb69638fb658d581b9c3979c50559fa0d196d2e3896b7d9c8
3
+ size 21024206
old_app.py ADDED
@@ -0,0 +1,427 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from app.logger_config import logger as logging
2
+ import numpy as np
3
+ import gradio as gr
4
+ import asyncio
5
+ from fastrtc.webrtc import WebRTC
6
+ from fastrtc.utils import AdditionalOutputs
7
+ from pydub import AudioSegment
8
+ import time
9
+ import os
10
+ import json
11
+ import spaces
12
+ from app.utils import generate_coturn_config,raise_function
13
+
14
+ from app.old_session_utils import (
15
+ TMP_DIR,
16
+ generate_session_id,
17
+ register_session,
18
+ unregister_session,
19
+ get_active_sessions,
20
+ stop_file_path,
21
+ create_stop_flag,
22
+ clear_stop_flag,
23
+ reset_all_active_sessions,
24
+ on_load,
25
+ on_unload
26
+
27
+ )
28
+
29
+ # Reset sessions at startup
30
+ reset_all_active_sessions()
31
+
32
+ EXAMPLE_FILES = ["data/bonjour.wav", "data/bonjour2.wav"]
33
+ DEFAULT_FILE = EXAMPLE_FILES[0]
34
+
35
+
36
+ # --------------------------------------------------------
37
+ # STREAMING
38
+ # --------------------------------------------------------
39
+ def read_and_stream_audio(filepath_to_stream: str, session_id: str, chunk_seconds: float):
40
+ """Stream audio chunks and save .npz files only when transcription is active."""
41
+ stop_file = os.path.join(TMP_DIR, f"stream_stop_flag_{session_id}.txt")
42
+ transcribe_flag = os.path.join(TMP_DIR, f"transcribe_active_{session_id}.txt")
43
+
44
+ logging.debug(f"[{session_id}] read_and_stream_audio() started with file: {filepath_to_stream}")
45
+
46
+ try:
47
+ if not filepath_to_stream or not os.path.exists(filepath_to_stream):
48
+ logging.error(f"[{session_id}] Audio file not found: {filepath_to_stream}")
49
+ raise f"Audio file not found: {filepath_to_stream}"
50
+ clear_stop_flag(session_id)
51
+ register_session(session_id, filepath_to_stream)
52
+ progress_path = os.path.join(TMP_DIR, f"progress_{session_id}.json")
53
+
54
+
55
+ segment = AudioSegment.from_file(filepath_to_stream)
56
+ chunk_ms = int(chunk_seconds * 1000)
57
+ total_chunks = len(segment) // chunk_ms + 1
58
+
59
+ logging.info(f"[{session_id}] Streaming {total_chunks} chunks ({chunk_seconds:.2f}s each)...")
60
+
61
+ for i, chunk in enumerate(segment[::chunk_ms], start=1):
62
+ if os.path.exists(stop_file):
63
+ logging.info(f"[{session_id}] Stop flag detected at chunk {i}. Ending stream.")
64
+ clear_stop_flag(session_id)
65
+ break
66
+ logging.info(f"[{session_id}] Streaming chunk {i}.")
67
+ iter_start = time.perf_counter()
68
+
69
+ elapsed_s = i * chunk_seconds
70
+ hours, remainder = divmod(int(elapsed_s), 3600)
71
+ minutes, seconds = divmod(remainder, 60)
72
+ elapsed_str = f"{hours:02d}:{minutes:02d}:{seconds:02d}"
73
+
74
+ percent = round((i / total_chunks) * 100, 2)
75
+ progress_data = {"value": percent, "elapsed": elapsed_str}
76
+ with open(progress_path, "w") as f:
77
+ json.dump(progress_data, f)
78
+ chunk_array = np.array(chunk.get_array_of_samples(), dtype=np.int16)
79
+ rate = chunk.frame_rate
80
+ # Save only if transcription is active
81
+ if os.path.exists(transcribe_flag) :
82
+ chunk_dir = os.path.join(TMP_DIR, f"chunks_{session_id}")
83
+ if not os.path.exists(chunk_dir) :
84
+ os.makedirs(chunk_dir, exist_ok=True)
85
+ npz_path = os.path.join(chunk_dir, f"chunk_{i:05d}.npz")
86
+ np.savez_compressed(npz_path, data=chunk_array, rate=rate)
87
+ logging.debug(f"[{session_id}] Saved chunk {i}/{total_chunks} (transcribe active)")
88
+
89
+ # Stream audio to client
90
+ # yield (rate, chunk_array.reshape(1, -1))
91
+ msg = f"Chunk {i}/{total_chunks}"
92
+ yield ( (rate, chunk_array.reshape(1, -1)), AdditionalOutputs(msg) )
93
+ process_ms = (time.perf_counter() - iter_start) * 1000
94
+ # time.sleep(max(chunk_seconds - (process_ms / 1000.0) - 0.1, 0.01))
95
+ time.sleep(chunk_seconds)
96
+ raise_function()
97
+ logging.info(f"[{session_id}] Streaming completed successfully.")
98
+
99
+ except Exception as e:
100
+ logging.error(f"[{session_id}] Stream error: {e}", exc_info=True)
101
+ finally:
102
+ unregister_session(session_id)
103
+ clear_stop_flag(session_id)
104
+ if os.path.exists(progress_path):
105
+ os.remove(progress_path)
106
+ yield (None, AdditionalOutputs("STREAM_DONE"))
107
+
108
+
109
+ # --------------------------------------------------------
110
+ # TRANSCRIPTION
111
+ # --------------------------------------------------------
112
+ @spaces.GPU
113
+ def transcribe(session_id: str):
114
+ """Continuously read and delete .npz chunks while transcription is active."""
115
+ active_flag = os.path.join(TMP_DIR, f"transcribe_active_{session_id}.txt")
116
+ with open(active_flag, "w") as f:
117
+ f.write("1")
118
+ logging.info(f"[{session_id}] Transcription started.")
119
+ chunk_dir = os.path.join(TMP_DIR, f"chunks_{session_id}")
120
+
121
+
122
+ try:
123
+ logging.info(f"[{session_id}] Transcription loop started.")
124
+ while os.path.exists(active_flag):
125
+ if not os.path.exists(chunk_dir):
126
+ logging.warning(f"[{session_id}] No chunk directory found for transcription.")
127
+ time.sleep(0.25)
128
+ continue
129
+ files = sorted(f for f in os.listdir(chunk_dir) if f.endswith(".npz"))
130
+ if not files:
131
+ time.sleep(0.25)
132
+ continue
133
+
134
+ for fname in files:
135
+ fpath = os.path.join(chunk_dir, fname)
136
+ try:
137
+ npz = np.load(fpath)
138
+ samples = npz["data"]
139
+ rate = int(npz["rate"])
140
+
141
+ text = f"Transcribed {fname}: {len(samples)} samples @ {rate}Hz"
142
+ logging.debug(f"[{session_id}] {text}")
143
+
144
+ os.remove(fpath)
145
+ logging.debug(f"[{session_id}] Deleted processed chunk: {fname}")
146
+ except Exception as e:
147
+ logging.error(f"[{session_id}] Error processing {fname}: {e}")
148
+ continue
149
+
150
+ time.sleep(0.25)
151
+ raise_function()
152
+ logging.info(f"[{session_id}] Transcription loop ended (flag removed).")
153
+
154
+ except Exception as e:
155
+ logging.error(f"[{session_id}] Transcription error: {e}", exc_info=True)
156
+ finally:
157
+ transcribe_active = os.path.join(TMP_DIR, f"transcribe_active_{session_id}.txt")
158
+ if os.path.exists(transcribe_active):
159
+ os.remove(transcribe_active)
160
+ logging.info(f"[{session_id}] Transcription stopped.")
161
+ try:
162
+ if os.path.exists(chunk_dir) and not os.listdir(chunk_dir):
163
+ os.rmdir(chunk_dir)
164
+ logging.debug(f"[{session_id}] Cleaned up empty chunk dir.")
165
+ except Exception as e:
166
+ logging.error(f"[{session_id}] Cleanup error: {e}")
167
+ logging.info(f"[{session_id}] Exiting transcription loop.")
168
+ return {
169
+ start_transcribe: gr.update(interactive=True),
170
+ stop_transcribe: gr.update(interactive=False),
171
+ progress_text: gr.update(value="πŸ›‘ Transcription stopped."),
172
+ }
173
+
174
+
175
+ # --------------------------------------------------------
176
+ # STOP STREAMING
177
+ # --------------------------------------------------------
178
+ # def stop_streaming(session_id: str):
179
+ # create_stop_flag(session_id)
180
+ # logging.info(f"[{session_id}] Stop button clicked β†’ stop flag created.")
181
+ # return None
182
+
183
+
184
+ def get_session_progress(session_id: str):
185
+ """Read streaming progress and return slider position + elapsed time."""
186
+ progress_path = os.path.join(TMP_DIR, f"progress_{session_id}.json")
187
+ if not os.path.exists(progress_path):
188
+ return 0.0, "00:00:00"
189
+ try:
190
+ with open(progress_path, "r") as f:
191
+ data = json.load(f)
192
+ value = data.get("value", 0.0)
193
+ elapsed = data.get("elapsed", "00:00:00")
194
+ return value, elapsed
195
+ except Exception:
196
+ return 0.0, "00:00:00"
197
+
198
+
199
+ def handle_additional_outputs(message):
200
+ """Called each time a new AdditionalOutputs is received."""
201
+ logging.debug(f"πŸ“‘ Additional output received: {message}")
202
+
203
+ if message == "STREAM_DONE":
204
+ return "βœ… Streaming finished"
205
+ elif message:
206
+ return f"πŸ“‘ {message}"
207
+ else:
208
+ return ""
209
+
210
+ # --------------------------------------------------------
211
+ # UI
212
+ # --------------------------------------------------------
213
+ with gr.Blocks(theme=gr.themes.Soft()) as demo:
214
+ gr.Markdown(
215
+ "## 🎧 WebRTC Audio Streamer (Multi-user)\n"
216
+ "Each user controls their own stream. Transcription runs only during streaming."
217
+ )
218
+
219
+ session_id = gr.State()
220
+ sid_box = gr.Textbox(label="Session ID", interactive=False)
221
+ demo.load(fn=on_load, inputs=None, outputs=[session_id, sid_box])
222
+ demo.unload(on_unload)
223
+ active_filepath = gr.State(value=DEFAULT_FILE)
224
+
225
+ with gr.Row(equal_height=True):
226
+ with gr.Column(elem_id="column_source", scale=1):
227
+ with gr.Group(elem_id="centered_content"):
228
+ main_audio = gr.Audio(
229
+ label="Audio Source",
230
+ sources=["upload", "microphone"],
231
+ type="filepath",
232
+ value=DEFAULT_FILE,
233
+ )
234
+
235
+ chunk_slider = gr.Slider(
236
+ label="Chunk Duration (seconds)",
237
+ minimum=0.5,
238
+ maximum=5.0,
239
+ value=1.0,
240
+ step=0.5,
241
+ interactive=True,
242
+ )
243
+
244
+ progress_bar = gr.Slider(
245
+ label="Streaming Progress (%)",
246
+ minimum=0,
247
+ maximum=100,
248
+ value=0,
249
+ step=0.1,
250
+ interactive=False,
251
+ visible=False,
252
+ )
253
+
254
+ progress_text = gr.Textbox(
255
+ label="Elapsed Time (hh:mm:ss)",
256
+ interactive=False,
257
+ visible=False,
258
+ )
259
+
260
+ with gr.Row():
261
+ start_button = gr.Button("▢️ Start Streaming", variant="primary")
262
+ stop_button = gr.Button("⏹️ Stop Streaming", variant="stop", interactive=False)
263
+
264
+ with gr.Column():
265
+ status_box = gr.Textbox(label="Status", interactive=False)
266
+ webrtc_stream = WebRTC(
267
+ label="Audio Stream",
268
+ mode="receive",
269
+ modality="audio",
270
+ rtc_configuration=generate_coturn_config(),
271
+ visible=True,
272
+
273
+
274
+ )
275
+
276
+
277
+
278
+ # --- Transcription Controls ---
279
+ with gr.Row(equal_height=True):
280
+ with gr.Column():
281
+ start_transcribe = gr.Button("πŸŽ™οΈ Start Transcribe", interactive=False)
282
+ stop_transcribe = gr.Button("πŸ›‘ Stop Transcribe", interactive=False)
283
+
284
+ # --- UI Logic ---
285
+ def start_streaming(session_id):
286
+ return {
287
+ start_button: gr.update(interactive=False),
288
+ stop_button: gr.update(interactive=True),
289
+ start_transcribe: gr.update(interactive=True),
290
+ stop_transcribe: gr.update(interactive=False),
291
+ chunk_slider: gr.update(interactive=False),
292
+ main_audio: gr.update(visible=False),
293
+ progress_bar: gr.update(value=0, visible=True),
294
+ progress_text: gr.update(value="00:00:00", visible=True),
295
+ }
296
+
297
+ def stop_streaming(session_id):
298
+ logging.debug(f"[{session_id}] UI: Stop clicked β†’ restoring controls.")
299
+ create_stop_flag(session_id)
300
+ return {
301
+ webrtc_stream : None,
302
+ start_button: gr.update(interactive=True),
303
+ stop_button: gr.update(interactive=False),
304
+ start_transcribe: gr.update(interactive=False),
305
+ stop_transcribe: gr.update(interactive=False),
306
+ chunk_slider: gr.update(interactive=True),
307
+ main_audio: gr.update(visible=True),
308
+ progress_bar: gr.update(value=0, visible=False),
309
+ progress_text: gr.update(value="00:00:00", visible=False),
310
+ }
311
+
312
+ ui_components = [
313
+ start_button, stop_button, start_transcribe, stop_transcribe,
314
+ chunk_slider, main_audio, progress_bar, progress_text,
315
+ ]
316
+ # --- Streaming event ---
317
+ webrtc_stream.stream(
318
+ fn=read_and_stream_audio,
319
+ inputs=[active_filepath, session_id, chunk_slider],
320
+ outputs=[webrtc_stream ],
321
+ trigger=start_button.click,
322
+ concurrency_limit=20,
323
+ concurrency_id="receive",
324
+ )
325
+ webrtc_stream.on_additional_outputs(
326
+ fn=handle_additional_outputs,
327
+ outputs=[status_box],
328
+ )
329
+ # status_box.change(
330
+ # fn=update_status,
331
+ # inputs=[status_box],
332
+ # outputs=[status_box],
333
+ # )
334
+ # .then(
335
+ # fn=stop_streaming,
336
+ # inputs=[session_id],
337
+ # outputs=ui_components
338
+ # )
339
+
340
+
341
+ start_button.click(fn=start_streaming, inputs=[session_id], outputs=ui_components)
342
+ # .then(fn=stop_streaming, inputs=[session_id], outputs=[webrtc_stream] + ui_components)
343
+
344
+ stop_button.click(fn=stop_streaming, inputs=[session_id], outputs=[webrtc_stream] + ui_components)
345
+
346
+
347
+
348
+
349
+
350
+
351
+ # --- Transcription control logic ---
352
+ def start_transcribe_ui(session_id: str):
353
+ """Create transcription flag and update UI."""
354
+ return {
355
+ start_transcribe: gr.update(interactive=False),
356
+ stop_transcribe: gr.update(interactive=True),
357
+ progress_text: gr.update(value="πŸŽ™οΈ Transcription started..."),
358
+ }
359
+
360
+ def stop_transcribe_ui(session_id: str):
361
+ """Stop transcription by removing flag and update UI."""
362
+ transcribe_active = os.path.join(TMP_DIR, f"transcribe_active_{session_id}.txt")
363
+ if os.path.exists(transcribe_active):
364
+ os.remove(transcribe_active)
365
+ return {
366
+ start_transcribe: gr.update(interactive=True),
367
+ stop_transcribe: gr.update(interactive=False),
368
+ progress_text: gr.update(value="πŸ›‘ Transcription stopped."),
369
+ }
370
+
371
+
372
+ start_transcribe.click(
373
+ fn=start_transcribe_ui,
374
+ inputs=[session_id],
375
+ outputs=[start_transcribe, stop_transcribe, progress_text],
376
+ # --- then chain the transcription process ---
377
+ ).then(
378
+ fn=transcribe,
379
+ inputs=[session_id],
380
+ outputs=[start_transcribe, stop_transcribe, progress_text],
381
+ )
382
+
383
+ stop_transcribe.click(
384
+ fn=stop_transcribe_ui,
385
+ inputs=[session_id],
386
+ outputs=[start_transcribe, stop_transcribe, progress_text],
387
+ )
388
+
389
+ # --- Active sessions ---
390
+ with gr.Accordion("πŸ“Š Active Sessions", open=False):
391
+ sessions_table = gr.DataFrame(
392
+ headers=["session_id", "file", "start_time", "status"],
393
+ interactive=False,
394
+ wrap=True,
395
+ max_height=200,
396
+ )
397
+
398
+ gr.Timer(3.0).tick(fn=get_active_sessions, outputs=sessions_table)
399
+ gr.Timer(1.0).tick(fn=get_session_progress, inputs=[session_id], outputs=[progress_bar, progress_text])
400
+
401
+ # --------------------------------------------------------
402
+ # CSS
403
+ # --------------------------------------------------------
404
+ custom_css = """
405
+ #column_source {
406
+ display: flex;
407
+ flex-direction: column;
408
+ justify-content: center;
409
+ align-items: center;
410
+ gap: 1rem;
411
+ margin-top: auto;
412
+ margin-bottom: auto;
413
+ }
414
+ #column_source .gr-row {
415
+ padding-top: 12px;
416
+ padding-bottom: 12px;
417
+ }
418
+ """
419
+ demo.css = custom_css
420
+
421
+
422
+ # --------------------------------------------------------
423
+ # MAIN
424
+ # --------------------------------------------------------
425
+ if __name__ == "__main__":
426
+
427
+ demo.queue(max_size=20, api_open=False).launch(show_api=False, debug=True)