Archime commited on
Commit
b6a38da
·
1 Parent(s): d296c7b

ERROR HANDLING GPU ABORT Hugging Face

Browse files
Files changed (1) hide show
  1. app/ui_utils.py +67 -49
app/ui_utils.py CHANGED
@@ -397,63 +397,81 @@ def start_task_asr_ast(
397
  streaming_policy, alignatt_thr, waitk_lagging,
398
  exclude_sink_frames, xatt_scores_layer, hallucinations_detector
399
  )
400
-
401
  # Loop over partial results
402
  # result_data: can be transcribed text OR an info message depending on status
403
- for result_data, status, debug_info in task_generator:
404
-
405
- # 1. Default states for this iteration ('In Progress' mode)
406
- # By default, lock config and allow stopping
407
- start_btn = gr.update(visible=False)
408
- stop_btn = gr.update(visible=True)
409
- config_step = gr.update(interactive=False)
410
-
411
- # Status message and main text depend on return type
412
- status_msg = gr.update(visible=True)
413
- main_output = accumulated_text
414
-
415
- # --- CASE 1: SUCCESS (New text segment) ---
416
- if status == "success":
417
- # result_data is the new text chunk here
418
- partial_text = result_data
419
-
420
- # Update accumulator
421
- accumulated_text += partial_text
422
- main_output = accumulated_text
423
 
424
- # Status message displays chunk info (e.g., timestamps)
425
- status_msg = gr.update(visible=True, value=debug_info, elem_classes=[status])
426
-
427
- # --- CASE 2: WARNING / INFO (System message) ---
428
- elif status in ["warning", "info"]:
429
- # result_data is the error or info message here
430
- # Do not touch accumulated_text
431
- status_msg = gr.update(visible=True, value=result_data, elem_classes=[status])
432
-
433
-
434
- # --- CASE 3: DONE / ERROR---
435
- elif status in ["done", "error"]:
436
- logging.error(f"[ui] error ")
437
- # Re-enable controls
438
- is_streaming = is_active_stream(session_hash_code)
439
- start_btn = gr.update(visible=is_streaming) # Show Start only if audio stream is active
440
- stop_btn = gr.update(visible=False)
441
- config_step = gr.update(interactive=True)
442
 
443
- # result_data is the completion message
444
- status_msg = gr.update(visible=True, value=result_data, elem_classes=[status])
 
445
 
446
- # 2. Single dispatch to UI
447
- # Expected order: [task_output, status_message_task, start_task_button, stop_task_button, config_step]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
448
  yield (
449
- main_output,
450
- status_msg,
451
- start_btn,
452
- stop_btn,
453
- config_step
454
  )
455
 
456
-
457
  def stop_task_fn(session_hash_code):
458
  remove_active_task_flag_file(session_hash_code)
459
  yield "Task stopped by user."
 
397
  streaming_policy, alignatt_thr, waitk_lagging,
398
  exclude_sink_frames, xatt_scores_layer, hallucinations_detector
399
  )
400
+ try:
401
  # Loop over partial results
402
  # result_data: can be transcribed text OR an info message depending on status
403
+ for result_data, status, debug_info in task_generator:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
404
 
405
+ # 1. Default states for this iteration ('In Progress' mode)
406
+ # By default, lock config and allow stopping
407
+ start_btn = gr.update(visible=False)
408
+ stop_btn = gr.update(visible=True)
409
+ config_step = gr.update(interactive=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
410
 
411
+ # Status message and main text depend on return type
412
+ status_msg = gr.update(visible=True)
413
+ main_output = accumulated_text
414
 
415
+ # --- CASE 1: SUCCESS (New text segment) ---
416
+ if status == "success":
417
+ # result_data is the new text chunk here
418
+ partial_text = result_data
419
+
420
+ # Update accumulator
421
+ accumulated_text += partial_text
422
+ main_output = accumulated_text
423
+
424
+ # Status message displays chunk info (e.g., timestamps)
425
+ status_msg = gr.update(visible=True, value=debug_info, elem_classes=[status])
426
+
427
+ # --- CASE 2: WARNING / INFO (System message) ---
428
+ elif status in ["warning", "info"]:
429
+ # result_data is the error or info message here
430
+ # Do not touch accumulated_text
431
+ status_msg = gr.update(visible=True, value=result_data, elem_classes=[status])
432
+
433
+
434
+ # --- CASE 3: DONE / ERROR---
435
+ elif status in ["done", "error"]:
436
+ logging.error(f"[ui] error ")
437
+ # Re-enable controls
438
+ is_streaming = is_active_stream(session_hash_code)
439
+ start_btn = gr.update(visible=is_streaming) # Show Start only if audio stream is active
440
+ stop_btn = gr.update(visible=False)
441
+ config_step = gr.update(interactive=True)
442
+
443
+ # result_data is the completion message
444
+ status_msg = gr.update(visible=True, value=result_data, elem_classes=[status])
445
+
446
+ # 2. Single dispatch to UI
447
+ # Expected order: [task_output, status_message_task, start_task_button, stop_task_button, config_step]
448
+ yield (
449
+ main_output,
450
+ status_msg,
451
+ start_btn,
452
+ stop_btn,
453
+ config_step
454
+ )
455
+ except Exception as e:
456
+ # --- ERROR HANDLING (GPU ABORT / RUNTIME ERROR) ---
457
+ error_msg = str(e)
458
+ logging.error(f"Task Error for {session_hash_code}: {error_msg}", exc_info=True)
459
+
460
+ # Detect specific Hugging Face / GPU errors
461
+ if "GPU task aborted" in error_msg or "CUDA out of memory" in error_msg or "Device" in error_msg:
462
+ display_msg = f"🛑 **System Error:** GPU Task Aborted. The model may have run out of memory. ({error_msg})"
463
+ else:
464
+ display_msg = f"⚠️ **Task Error:** {error_msg}"
465
+
466
+ # Update UI to reflect the crash
467
  yield (
468
+ accumulated_text, # Keep what we managed to generate so far
469
+ gr.update(visible=True, value=display_msg, elem_classes=["error"]), # Show error in status box
470
+ gr.update(visible=True), # Re-enable Start button to allow retry
471
+ gr.update(visible=False), # Hide Stop button
472
+ gr.update(interactive=True) # Unlock config
473
  )
474
 
 
475
  def stop_task_fn(session_hash_code):
476
  remove_active_task_flag_file(session_hash_code)
477
  yield "Task stopped by user."