Spaces:

Archime
/

canary_aed_streaming

Running on Zero

App Files Files Community

Archime commited on Nov 7

Commit

4c19345

1 Parent(s): f482080

add start and stop task

Browse files

Files changed (4) hide show

app.py +52 -37
app/session_utils.py +100 -76
app/stream_utils.py +83 -5
app/ui_utils.py +1 -1

app.py CHANGED Viewed

@@ -18,9 +18,11 @@ from app.utils import (
 from app.session_utils import (
     on_load,
     on_unload,
-    get_active_sessions,
-    register_session,
-    reset_all_active_sessions,
 )
 from app.ui_utils import (
@@ -37,27 +39,28 @@ from app.ui_utils import (
 from app.stream_utils import (
     generate_coturn_config,
     read_and_stream_audio,
-    stop_streaming
 )
 # --------------------------------------------------------
 # Initialization
 # --------------------------------------------------------
-reset_all_active_sessions()
 theme,css_style = get_custom_theme()
 with gr.Blocks(theme=theme, css=css_style) as demo:
     session_hash = gr.State()
     session_hash_box = gr.Textbox(label="Session ID", interactive=False, visible=DEBUG)
-    with gr.Accordion("📊 Active Sessions", open=True ,visible=DEBUG):
         sessions_table = gr.DataFrame(
             headers=["session_hash", "file", "start_time", "status"],
             interactive=False,
             wrap=True,
             max_height=200,
         )
-        gr.Timer(3.0).tick(fn=get_active_sessions, outputs=sessions_table)
     demo.load(fn=on_load, inputs=None, outputs=[session_hash, session_hash_box])
     demo.unload(on_unload)
@@ -226,17 +229,17 @@ with gr.Blocks(theme=theme, css=css_style) as demo:
                         interactive=False,
                         visible=False
                     )
                     transcription_output = gr.Textbox(
                         label="Transcription / Translation Result",
-                        placeholder="The output text will appear here...",
                         lines=10,
                         interactive=False,
                         visible=True
                     )
                     start_task_button = gr.Button("Start Task", visible=True)
-                    stop_stream_button = gr.Button("Stop Streaming", visible=False)
                     stop_task_button = gr.Button("Stop Task", visible=False)
                     stop_stream_button.click(
@@ -245,14 +248,50 @@ with gr.Blocks(theme=theme, css=css_style) as demo:
                         outputs=[stop_streaming_flags],
                     )
-                    def stop_task_fn():
-                        return "Task stopped by user."
                     stop_task_button.click(
                         fn=stop_task_fn,
-                        inputs=None,
                         outputs=transcription_output
                     )
                     ui_components = [
                         start_stream_button, stop_stream_button,
@@ -267,30 +306,6 @@ with gr.Blocks(theme=theme, css=css_style) as demo:
                         concurrency_limit=10,
                     )
-                    # def start_transcription(
-                    #     session_hash, stop_streaming_flags,
-                    #     task_type, lang_source, lang_target,
-                    #     chunk_secs, left_context_secs, right_context_secs,
-                    #     streaming_policy, alignatt_thr, waitk_lagging,
-                    #     exclude_sink_frames, xatt_scores_layer, hallucinations_detector
-                    # ):
-                    #     if task_type == "Translation":
-                    #         return f"Translation completed ({lang_source} → {lang_target})\n\nTranslated text:\nLorem ipsum..."
-                    #     else:
-                    #         return f"Transcription completed ({lang_source})\n\nTranscribed text:\nHello everyone, this is a test audio stream..."
-                    # start_task_button.click(
-                    #     fn=start_transcription,
-                    #     inputs=[
-                    #         session_hash, stop_streaming_flags,
-                    #         task_type, lang_source, lang_target,
-                    #         chunk_secs, left_context_secs, right_context_secs,
-                    #         streaming_policy, alignatt_thr, waitk_lagging,
-                    #         exclude_sink_frames, xatt_scores_layer, hallucinations_detector
-                    #     ],
-                    #     outputs=transcription_output
-                    # )
                     # def toggle_task_buttons():
                     #     return (
                     #         gr.update(visible=False),
@@ -307,4 +322,4 @@ with gr.Blocks(theme=theme, css=css_style) as demo:
 if __name__ == "__main__":
-    demo.queue(max_size=10, api_open=False).launch(show_api=False,show_error=True, debug=True)

 from app.session_utils import (
     on_load,
     on_unload,
+    get_active_sessions_hash,
+    register_session_hash,
+    reset_all_active_sessions_hash,
+    get_active_task_flag_file,
 )
 from app.ui_utils import (
 from app.stream_utils import (
     generate_coturn_config,
     read_and_stream_audio,
+    stop_streaming,
+    task
 )
 # --------------------------------------------------------
 # Initialization
 # --------------------------------------------------------
+reset_all_active_sessions_hash()
 theme,css_style = get_custom_theme()
 with gr.Blocks(theme=theme, css=css_style) as demo:
     session_hash = gr.State()
     session_hash_box = gr.Textbox(label="Session ID", interactive=False, visible=DEBUG)
+    with gr.Accordion("📊 Active Sessions Hash", open=True ,visible=DEBUG):
         sessions_table = gr.DataFrame(
             headers=["session_hash", "file", "start_time", "status"],
             interactive=False,
             wrap=True,
             max_height=200,
         )
+        gr.Timer(3.0).tick(fn=get_active_sessions_hash, outputs=sessions_table)
     demo.load(fn=on_load, inputs=None, outputs=[session_hash, session_hash_box])
     demo.unload(on_unload)
                         interactive=False,
                         visible=False
                     )
+                    stop_stream_button = gr.Button("Stop Streaming", visible=False)
                     transcription_output = gr.Textbox(
                         label="Transcription / Translation Result",
+                        placeholder="Waiting for output...",
                         lines=10,
                         interactive=False,
                         visible=True
                     )
                     start_task_button = gr.Button("Start Task", visible=True)
                     stop_task_button = gr.Button("Stop Task", visible=False)
                     stop_stream_button.click(
                         outputs=[stop_streaming_flags],
                     )
+                    def stop_task_fn(session_hash):
+                        transcribe_active = get_active_task_flag_file(session_hash)
+                        if os.path.exists(transcribe_active):
+                            os.remove(transcribe_active)
+                        yield "Task stopped by user."
                     stop_task_button.click(
                         fn=stop_task_fn,
+                        inputs=session_hash,
                         outputs=transcription_output
                     )
+                        # task(session_hash)
+                    def start_transcription(
+                        session_hash, stop_streaming_flags,
+                        task_type, lang_source, lang_target,
+                        chunk_secs, left_context_secs, right_context_secs,
+                        streaming_policy, alignatt_thr, waitk_lagging,
+                        exclude_sink_frames, xatt_scores_layer, hallucinations_detector
+                    ):
+                        """Stream transcription or translation results in real time."""
+                        accumulated = ""
+                        yield f"Starting {task_type.lower()}...\n\n",gr.update(visible=False),gr.update(visible=True)
+                        # Boucle sur le générateur de `task()`
+                        for msg in task(session_hash):
+                            accumulated += msg
+                            yield accumulated,gr.update(visible=False),gr.update(visible=True)
+                        yield accumulated + "\nDone.",gr.update(visible=True),gr.update(visible=False)
+                    start_task_button.click(
+                        fn=start_transcription,
+                        inputs=[
+                            session_hash, stop_streaming_flags,
+                            task_type, lang_source, lang_target,
+                            chunk_secs, left_context_secs, right_context_secs,
+                            streaming_policy, alignatt_thr, waitk_lagging,
+                            exclude_sink_frames, xatt_scores_layer, hallucinations_detector
+                        ],
+                        outputs=[transcription_output,start_task_button,stop_task_button]
+                    )
                     ui_components = [
                         start_stream_button, stop_stream_button,
                         concurrency_limit=10,
                     )
                     # def toggle_task_buttons():
                     #     return (
                     #         gr.update(visible=False),
 if __name__ == "__main__":
+    demo.queue(max_size=10, api_open=False).launch(show_api=False,show_error=True, debug=DEBUG)

app/session_utils.py CHANGED Viewed

@@ -8,25 +8,27 @@ import gradio as gr
 # TMP_DIR = "/tmp/canary_aed_streaming"
 TMP_DIR = os.getenv("TMP_DIR", "/tmp/canary_aed_streaming")
-ACTIVE_SESSIONS_FILE = os.path.join(TMP_DIR, "active_sessions.json")
 # ---------------------------
 # Helper to manage the JSON
 # ---------------------------
-def _read_sessions():
-    if not os.path.exists(ACTIVE_SESSIONS_FILE):
         return {}
     try:
-        with open(ACTIVE_SESSIONS_FILE, "r") as f:
             return json.load(f)
     except Exception:
         return {}
-def _write_sessions(data):
-    os.makedirs(os.path.dirname(ACTIVE_SESSIONS_FILE), exist_ok=True)
-    with open(ACTIVE_SESSIONS_FILE, "w") as f:
         json.dump(data, f, indent=2)
@@ -35,8 +37,8 @@ def _write_sessions(data):
 # ---------------------------
 def on_load(request: gr.Request):
     """Called when a new visitor opens the app."""
-    session_hash = request.session_hash  # ✅ Directly use session_hash as unique ID
-    sessions = _read_sessions()
     sessions[session_hash] = {
         "session_hash": session_hash,
@@ -45,8 +47,8 @@ def on_load(request: gr.Request):
         "status": "active",
     }
-    _write_sessions(sessions)
-    logging.info(f"[{session_hash}] Session registered (on_load).")
     return session_hash, session_hash  # can be used as gr.State + display
@@ -57,16 +59,16 @@ def on_load(request: gr.Request):
 def on_unload(request: gr.Request):
     """Called when the visitor closes or refreshes the app."""
     sid = request.session_hash
-    sessions = _read_sessions()
     if sid in sessions:
         sessions.pop(sid)
-        _write_sessions(sessions)
-        remove_session_data(sid)
-        unregister_session(sid)
-        logging.info(f"[{sid}] Session removed (on_unload).")
     else:
-        logging.info(f"[{sid}] No active session found to remove.")
 def ensure_tmp_dir():
     """Ensures the base temporary directory exists."""
@@ -76,47 +78,66 @@ def ensure_tmp_dir():
         logging.error(f"Failed to create tmp directory {TMP_DIR}: {e}")
-def reset_all_active_sessions():
-    """Removes all temporary session files and folders at startup."""
     ensure_tmp_dir()
     try:
-        # --- Remove active sessions file ---
-        if os.path.exists(ACTIVE_SESSIONS_FILE):
-            os.remove(ACTIVE_SESSIONS_FILE)
-            logging.info("Active sessions file reset at startup.")
         else:
-            logging.debug("No active sessions file found to reset.")
     except Exception as e:
-        logging.error(f"Error resetting active sessions: {e}")
-def remove_session_data(session_id: str):
-    """Removes all temporary files and data related to a specific session."""
-    if not session_id:
-        logging.warning("reset_session() called without a valid session_id.")
         return
     try:
-        # --- Remove session from active_sessions.json ---
-        if os.path.exists(ACTIVE_SESSIONS_FILE):
             try:
-                with open(ACTIVE_SESSIONS_FILE, "r") as f:
                     data = json.load(f)
-                if session_id in data:
-                    data.pop(session_id)
-                    with open(ACTIVE_SESSIONS_FILE, "w") as f:
                         json.dump(data, f, indent=2)
-                    logging.debug(f"[{session_id}] Removed from active_sessions.json.")
             except Exception as e:
-                logging.warning(f"[{session_id}] Failed to update active_sessions.json: {e}")
-        # --- Define all possible session file patterns ---
         files_to_remove = [
-            f"progress_{session_id}.json",
-            # f"stream_stop_flag_{session_id}.txt",
-            f"transcribe_stop_flag_{session_id}.txt",
-            f"transcribe_active_{session_id}.txt",
         ]
         # --- Remove all temporary files ---
@@ -125,78 +146,79 @@ def remove_session_data(session_id: str):
             if os.path.exists(path):
                 try:
                     os.remove(path)
-                    logging.debug(f"[{session_id}] Removed file: {fname}")
                 except Exception as e:
-                    logging.warning(f"[{session_id}] Failed to remove file {fname}: {e}")
         # --- Remove chunk folder if exists ---
-        chunk_dir = os.path.join(TMP_DIR, f"chunks_{session_id}")
         if os.path.isdir(chunk_dir):
             try:
                 shutil.rmtree(chunk_dir)
-                logging.debug(f"[{session_id}] Removed chunk folder: chunks_{session_id}")
             except Exception as e:
-                logging.warning(f"[{session_id}] Failed to remove chunk folder: {e}")
-        logging.info(f"[{session_id}] Session fully reset.")
     except Exception as e:
-        logging.error(f"[{session_id}] Error during reset_session: {e}")
 def generate_session_id() -> str:
-    """Generates a unique session ID."""
     sid = str(uuid.uuid4())
-    logging.debug(f"[{sid}] New session created.")
     return sid
-def register_session(session_id: str, filepath: str):
-    """Registers a new session."""
     ensure_tmp_dir()
     data = {}
-    if os.path.exists(ACTIVE_SESSIONS_FILE):
-        with open(ACTIVE_SESSIONS_FILE, "r") as f:
             try:
                 data = json.load(f)
             except Exception:
                 data = {}
-    data[session_id] = {
-        "session_hash": session_id,
         "file": filepath,
         "start_time": datetime.utcnow().strftime("%H:%M:%S"),
         "status": "active",
     }
-    with open(ACTIVE_SESSIONS_FILE, "w") as f:
         json.dump(data, f)
-    logging.debug(f"[{session_id}] Session registered in active_sessions.json.")
-def unregister_session(session_id: str):
-    """Removes a session from the registry."""
-    if not os.path.exists(ACTIVE_SESSIONS_FILE):
         return
     try:
-        with open(ACTIVE_SESSIONS_FILE, "r") as f:
             data = json.load(f)
-        if session_id in data:
-            data.pop(session_id)
-            with open(ACTIVE_SESSIONS_FILE, "w") as f:
                 json.dump(data, f)
-            logging.debug(f"[{session_id}] Session unregistered.")
     except Exception as e:
-        logging.error(f"[{session_id}] Error unregistering session: {e}")
-def get_active_sessions():
-    """Returns active sessions as a list of rows for the DataFrame."""
-    if not os.path.exists(ACTIVE_SESSIONS_FILE):
         return []
     try:
-        with open(ACTIVE_SESSIONS_FILE, "r") as f:
             data = json.load(f)
         rows = [
@@ -210,11 +232,13 @@ def get_active_sessions():
         ]
         return rows
     except Exception as e:
-        logging.error(f"Error reading active sessions: {e}")
         return []

 # TMP_DIR = "/tmp/canary_aed_streaming"
 TMP_DIR = os.getenv("TMP_DIR", "/tmp/canary_aed_streaming")
+ACTIVE_SESSIONS_HASH_FILE = os.path.join(TMP_DIR, "active_sessions_hash.json")
+ACTIVE_TASK_FLAG="task_active_"
+NAME_FOLDER_CHUNKS="chunks_"
 # ---------------------------
 # Helper to manage the JSON
 # ---------------------------
+def _read_sessions_hash():
+    if not os.path.exists(ACTIVE_SESSIONS_HASH_FILE):
         return {}
     try:
+        with open(ACTIVE_SESSIONS_HASH_FILE, "r") as f:
             return json.load(f)
     except Exception:
         return {}
+def _write_sessions_hash(data):
+    os.makedirs(os.path.dirname(ACTIVE_SESSIONS_HASH_FILE), exist_ok=True)
+    with open(ACTIVE_SESSIONS_HASH_FILE, "w") as f:
         json.dump(data, f, indent=2)
 # ---------------------------
 def on_load(request: gr.Request):
     """Called when a new visitor opens the app."""
+    session_hash = request.session_hash
+    sessions = _read_sessions_hash()
     sessions[session_hash] = {
         "session_hash": session_hash,
         "status": "active",
     }
+    _write_sessions_hash(sessions)
+    logging.info(f"[{session_hash}] session_hash registered (on_load).")
     return session_hash, session_hash  # can be used as gr.State + display
 def on_unload(request: gr.Request):
     """Called when the visitor closes or refreshes the app."""
     sid = request.session_hash
+    sessions = _read_sessions_hash()
     if sid in sessions:
         sessions.pop(sid)
+        _write_sessions_hash(sessions)
+        remove_session_hash_data(sid)
+        unregister_session_hash_hash(sid)
+        logging.info(f"[{sid}] session_hash removed (on_unload).")
     else:
+        logging.info(f"[{sid}] No active session_hash found to remove.")
 def ensure_tmp_dir():
     """Ensures the base temporary directory exists."""
         logging.error(f"Failed to create tmp directory {TMP_DIR}: {e}")
+def reset_all_active_sessions_hash():
+    """Removes all temporary session_hash files and folders at startup."""
     ensure_tmp_dir()
     try:
+        # --- Remove active session_hashs file ---
+        if os.path.exists(ACTIVE_SESSIONS_HASH_FILE):
+            os.remove(ACTIVE_SESSIONS_HASH_FILE)
+            logging.info("Active session_hashs file reset at startup.")
         else:
+            logging.debug("No active session_hashs file found to reset.")
+        # --- Clean all flag files (stream + transcribe) ---
+        for f in os.listdir(TMP_DIR):
+            if (
+                f.startswith(f"{ACTIVE_TASK_FLAG}")
+            ) and f.endswith(".txt"):
+                path = os.path.join(TMP_DIR, f)
+                try:
+                    os.remove(path)
+                    logging.debug(f"Removed leftover flag file: {f}")
+                except Exception as e:
+                    logging.warning(f"Failed to remove flag file {f}: {e}")
+        # --- Clean chunk directories ---
+        for name in os.listdir(TMP_DIR):
+            path = os.path.join(TMP_DIR, name)
+            if os.path.isdir(path) and name.startswith(f"{NAME_FOLDER_CHUNKS}"):
+                try:
+                    shutil.rmtree(path)
+                    logging.debug(f"Removed leftover chunk folder: {name}")
+                except Exception as e:
+                    logging.warning(f"Failed to remove chunk folder {name}: {e}")
+        logging.info("Temporary session cleanup completed successfully.")
     except Exception as e:
+        logging.error(f"Error resetting active session_hashs: {e}")
+def remove_session_hash_data(session_hash: str):
+    """Removes all temporary files and data related to a specific session_hash."""
+    if not session_hash:
+        logging.warning("reset_session() called without a valid session_hash.")
         return
     try:
+        # --- Remove session_hash from active_sessions.json ---
+        if os.path.exists(ACTIVE_SESSIONS_HASH_FILE):
             try:
+                with open(ACTIVE_SESSIONS_HASH_FILE, "r") as f:
                     data = json.load(f)
+                if session_hash in data:
+                    data.pop(session_hash)
+                    with open(ACTIVE_SESSIONS_HASH_FILE, "w") as f:
                         json.dump(data, f, indent=2)
+                    logging.debug(f"[{session_hash}] Removed from {ACTIVE_SESSIONS_HASH_FILE}.")
             except Exception as e:
+                logging.warning(f"[{session_hash}] Failed to update {ACTIVE_SESSIONS_HASH_FILE}: {e}")
+        # --- Define all possible session_hash file patterns ---
         files_to_remove = [
+            get_active_task_flag_file(session_hash),
         ]
         # --- Remove all temporary files ---
             if os.path.exists(path):
                 try:
                     os.remove(path)
+                    logging.debug(f"[{session_hash}] Removed file: {fname}")
                 except Exception as e:
+                    logging.warning(f"[{session_hash}] Failed to remove file {fname}: {e}")
         # --- Remove chunk folder if exists ---
+        chunk_dir = os.path.join(TMP_DIR, f"chunks_{session_hash}")
         if os.path.isdir(chunk_dir):
             try:
                 shutil.rmtree(chunk_dir)
+                logging.debug(f"[{session_hash}] Removed chunk folder: chunks_{session_hash}")
             except Exception as e:
+                logging.warning(f"[{session_hash}] Failed to remove chunk folder: {e}")
+        logging.info(f"[{session_hash}] session_hash fully reset.")
     except Exception as e:
+        logging.error(f"[{session_hash}] Error during reset_session: {e}")
 def generate_session_id() -> str:
+    """Generates a unique session_hash ID."""
     sid = str(uuid.uuid4())
+    logging.debug(f"[{sid}] New session_hash created.")
     return sid
+def register_session_hash(session_hash: str, filepath: str):
+    """Registers a new session_hash."""
     ensure_tmp_dir()
     data = {}
+    if os.path.exists(ACTIVE_SESSIONS_HASH_FILE):
+        with open(ACTIVE_SESSIONS_HASH_FILE, "r") as f:
             try:
                 data = json.load(f)
             except Exception:
                 data = {}
+    data[session_hash] = {
+        "session_hash": session_hash,
         "file": filepath,
         "start_time": datetime.utcnow().strftime("%H:%M:%S"),
         "status": "active",
     }
+    with open(ACTIVE_SESSIONS_HASH_FILE, "w") as f:
         json.dump(data, f)
+    logging.debug(f"[{session_hash}] session_hash registered in active_sessions.json.")
+def unregister_session_hash_hash(session_hash: str):
+    """Removes a session_hash from the registry."""
+    if not os.path.exists(ACTIVE_SESSIONS_HASH_FILE):
         return
     try:
+        with open(ACTIVE_SESSIONS_HASH_FILE, "r") as f:
             data = json.load(f)
+        if session_hash in data:
+            data.pop(session_hash)
+            with open(ACTIVE_SESSIONS_HASH_FILE, "w") as f:
                 json.dump(data, f)
+            logging.debug(f"[{session_hash}] session_hash unregistered.")
     except Exception as e:
+        logging.error(f"[{session_hash}] Error unregistering session_hash: {e}")
+def get_active_sessions_hash():
+    """Returns active session_hashs as a list of rows for the DataFrame."""
+    if not os.path.exists(ACTIVE_SESSIONS_HASH_FILE):
         return []
     try:
+        with open(ACTIVE_SESSIONS_HASH_FILE, "r") as f:
             data = json.load(f)
         rows = [
         ]
         return rows
     except Exception as e:
+        logging.error(f"Error reading active session_hashs: {e}")
         return []
+def get_active_task_flag_file(session_hash: str):
+    return  os.path.join(TMP_DIR, f"{ACTIVE_TASK_FLAG}{session_hash}.txt")
+def get_folder_chunks(session_hash: str):
+     return os.path.join(TMP_DIR, f"{NAME_FOLDER_CHUNKS}{session_hash}")

app/stream_utils.py CHANGED Viewed

@@ -14,14 +14,15 @@ import os
 import time
 import random
 # --------------------------------------------------------
 # Utility functions
 # --------------------------------------------------------
 def generate_coturn_config():
     """
     Génère une configuration Coturn complète avec authentification dynamique (use-auth-secret).
@@ -69,7 +70,7 @@ def read_and_stream_audio(filepath_to_stream: str, session_id: str, stop_streami
     if not filepath_to_stream or not os.path.exists(filepath_to_stream):
         yield from handle_stream_error(session_id, f"Audio file not found: {filepath_to_stream}", stop_streaming_flags)
         return
     try:
         segment = AudioSegment.from_file(filepath_to_stream)
         chunk_duration_ms = 1000
@@ -91,6 +92,16 @@ def read_and_stream_audio(filepath_to_stream: str, session_id: str, stop_streami
             logging.debug(f"[{session_id}] Sent chunk {i+1}/{total_chunks} ({progress}%).")
             time.sleep(1)
             # raise_function()  # Optional injected test exception
         logging.info(f"[{session_id}] Audio streaming completed successfully.")
@@ -138,6 +149,73 @@ def _is_stop_requested(stop_streaming_flags: dict) -> bool:
 def stop_streaming(session_id: str, stop_streaming_flags: dict):
     """Trigger the stop flag for active streaming."""
     logging.info(f"[{session_id}] Stop button clicked — sending stop signal.")

 import time
 import random
+from app.session_utils import (
+    get_active_task_flag_file,
+    get_stop_task_flag_file,
+    get_folder_chunks
+)
 # --------------------------------------------------------
 # Utility functions
 # --------------------------------------------------------
 def generate_coturn_config():
     """
     Génère une configuration Coturn complète avec authentification dynamique (use-auth-secret).
     if not filepath_to_stream or not os.path.exists(filepath_to_stream):
         yield from handle_stream_error(session_id, f"Audio file not found: {filepath_to_stream}", stop_streaming_flags)
         return
+    transcribe_flag = get_active_task_flag_file(session_id)
     try:
         segment = AudioSegment.from_file(filepath_to_stream)
         chunk_duration_ms = 1000
             logging.debug(f"[{session_id}] Sent chunk {i+1}/{total_chunks} ({progress}%).")
             time.sleep(1)
+            #  Save only if transcription is active
+            if os.path.exists(transcribe_flag) :
+                chunk_dir = get_folder_chunks(session_id)
+                if not os.path.exists(chunk_dir) :
+                    os.makedirs(chunk_dir, exist_ok=True)
+                npz_path = os.path.join(chunk_dir, f"chunk_{i:05d}.npz")
+                chunk_array = np.array(chunk.get_array_of_samples(), dtype=np.int16)
+                np.savez_compressed(npz_path, data=chunk_array, rate=frame_rate)
+                logging.debug(f"[{session_id}] Saved chunk {i}/{total_chunks} (transcribe active)")
             # raise_function()  # Optional injected test exception
         logging.info(f"[{session_id}] Audio streaming completed successfully.")
+def task(session_id: str):
+    """Continuously read and delete .npz chunks while task is active."""
+    active_flag = get_active_task_flag_file(session_id)
+    with open(active_flag, "w") as f:
+        f.write("1")
+    chunk_dir = get_folder_chunks(session_id)
+    logging.info(f"[{session_id}] task started. {chunk_dir}")
+    try:
+        logging.info(f"[{session_id}] task loop started.")
+        yield f"Task started for session {session_id}\n\n"
+        while os.path.exists(active_flag):
+            if not os.path.exists(chunk_dir):
+                logging.warning(f"[{session_id}] No chunk directory found for task.")
+                yield "No audio chunks yet... waiting for stream.\n"
+                time.sleep(0.25)
+                continue
+            files = sorted(f for f in os.listdir(chunk_dir) if f.endswith(".npz"))
+            if not files:
+                time.sleep(0.25)
+                continue
+            for fname in files:
+                fpath = os.path.join(chunk_dir, fname)
+                try:
+                    npz = np.load(fpath)
+                    samples = npz["data"]
+                    rate = int(npz["rate"])
+                    text = f"Transcribed {fname}: {len(samples)} samples @ {rate}Hz"
+                    yield f"{text}\n"
+                    logging.debug(f"[{session_id}] {text}")
+                    os.remove(fpath)
+                    logging.debug(f"[{session_id}] Deleted processed chunk: {fname}")
+                except Exception as e:
+                    logging.error(f"[{session_id}] Error processing {fname}: {e}")
+                    yield f"Error processing {fname}: {e}\n"
+                    continue
+            time.sleep(0.25)
+            # raise_function()
+        yield "\nTask stopped by user or stream ended.\n"
+        logging.info(f"[{session_id}] task loop ended (flag removed).")
+    except Exception as e:
+        logging.error(f"[{session_id}] task error: {e}", exc_info=True)
+        yield f"Unexpected error: {e}\n"
+    finally:
+        # active_flag = os.path.join(TMP_DIR, f"transcribe_active_{session_id}.txt")
+        if os.path.exists(active_flag):
+            os.remove(active_flag)
+        logging.info(f"[{session_id}] task stopped.")
+        try:
+            if os.path.exists(chunk_dir) and not os.listdir(chunk_dir):
+                os.rmdir(chunk_dir)
+                logging.debug(f"[{session_id}] Cleaned up empty chunk dir.")
+        except Exception as e:
+            logging.error(f"[{session_id}] Cleanup error: {e}")
+            yield "\nCleanup error: {e}"
+        logging.info(f"[{session_id}] Exiting task loop.")
+        yield "\nTask finished and cleaned up.\n"
 def stop_streaming(session_id: str, stop_streaming_flags: dict):
     """Trigger the stop flag for active streaming."""
     logging.info(f"[{session_id}] Stop button clicked — sending stop signal.")

app/ui_utils.py CHANGED Viewed

@@ -131,7 +131,7 @@ def handle_additional_outputs(webrtc_stream, msg):
     Update UI elements based on streaming progress or errors.
     Controls button states, audio visibility, and progress slider.
     """
-    logging.debug(f"Additional output received: {msg}")
     # ui_components = [start_stream_button, stop_stream_button,go_to_task, audio_source_step, status_slider,walkthrough]
     progress = float(0)

     Update UI elements based on streaming progress or errors.
     Controls button states, audio visibility, and progress slider.
     """
+    # logging.debug(f"Additional output received: {msg}")
     # ui_components = [start_stream_button, stop_stream_button,go_to_task, audio_source_step, status_slider,walkthrough]
     progress = float(0)