Spaces:
Running
on
Zero
Running
on
Zero
ERROR HANDLING GPU ABORT Hugging Face
Browse files- app/ui_utils.py +67 -49
app/ui_utils.py
CHANGED
|
@@ -397,63 +397,81 @@ def start_task_asr_ast(
|
|
| 397 |
streaming_policy, alignatt_thr, waitk_lagging,
|
| 398 |
exclude_sink_frames, xatt_scores_layer, hallucinations_detector
|
| 399 |
)
|
| 400 |
-
|
| 401 |
# Loop over partial results
|
| 402 |
# result_data: can be transcribed text OR an info message depending on status
|
| 403 |
-
|
| 404 |
-
|
| 405 |
-
# 1. Default states for this iteration ('In Progress' mode)
|
| 406 |
-
# By default, lock config and allow stopping
|
| 407 |
-
start_btn = gr.update(visible=False)
|
| 408 |
-
stop_btn = gr.update(visible=True)
|
| 409 |
-
config_step = gr.update(interactive=False)
|
| 410 |
-
|
| 411 |
-
# Status message and main text depend on return type
|
| 412 |
-
status_msg = gr.update(visible=True)
|
| 413 |
-
main_output = accumulated_text
|
| 414 |
-
|
| 415 |
-
# --- CASE 1: SUCCESS (New text segment) ---
|
| 416 |
-
if status == "success":
|
| 417 |
-
# result_data is the new text chunk here
|
| 418 |
-
partial_text = result_data
|
| 419 |
-
|
| 420 |
-
# Update accumulator
|
| 421 |
-
accumulated_text += partial_text
|
| 422 |
-
main_output = accumulated_text
|
| 423 |
|
| 424 |
-
#
|
| 425 |
-
|
| 426 |
-
|
| 427 |
-
|
| 428 |
-
|
| 429 |
-
# result_data is the error or info message here
|
| 430 |
-
# Do not touch accumulated_text
|
| 431 |
-
status_msg = gr.update(visible=True, value=result_data, elem_classes=[status])
|
| 432 |
-
|
| 433 |
-
|
| 434 |
-
# --- CASE 3: DONE / ERROR---
|
| 435 |
-
elif status in ["done", "error"]:
|
| 436 |
-
logging.error(f"[ui] error ")
|
| 437 |
-
# Re-enable controls
|
| 438 |
-
is_streaming = is_active_stream(session_hash_code)
|
| 439 |
-
start_btn = gr.update(visible=is_streaming) # Show Start only if audio stream is active
|
| 440 |
-
stop_btn = gr.update(visible=False)
|
| 441 |
-
config_step = gr.update(interactive=True)
|
| 442 |
|
| 443 |
-
#
|
| 444 |
-
status_msg = gr.update(visible=True
|
|
|
|
| 445 |
|
| 446 |
-
|
| 447 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 448 |
yield (
|
| 449 |
-
|
| 450 |
-
|
| 451 |
-
|
| 452 |
-
|
| 453 |
-
|
| 454 |
)
|
| 455 |
|
| 456 |
-
|
| 457 |
def stop_task_fn(session_hash_code):
|
| 458 |
remove_active_task_flag_file(session_hash_code)
|
| 459 |
yield "Task stopped by user."
|
|
|
|
| 397 |
streaming_policy, alignatt_thr, waitk_lagging,
|
| 398 |
exclude_sink_frames, xatt_scores_layer, hallucinations_detector
|
| 399 |
)
|
| 400 |
+
try:
|
| 401 |
# Loop over partial results
|
| 402 |
# result_data: can be transcribed text OR an info message depending on status
|
| 403 |
+
for result_data, status, debug_info in task_generator:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 404 |
|
| 405 |
+
# 1. Default states for this iteration ('In Progress' mode)
|
| 406 |
+
# By default, lock config and allow stopping
|
| 407 |
+
start_btn = gr.update(visible=False)
|
| 408 |
+
stop_btn = gr.update(visible=True)
|
| 409 |
+
config_step = gr.update(interactive=False)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 410 |
|
| 411 |
+
# Status message and main text depend on return type
|
| 412 |
+
status_msg = gr.update(visible=True)
|
| 413 |
+
main_output = accumulated_text
|
| 414 |
|
| 415 |
+
# --- CASE 1: SUCCESS (New text segment) ---
|
| 416 |
+
if status == "success":
|
| 417 |
+
# result_data is the new text chunk here
|
| 418 |
+
partial_text = result_data
|
| 419 |
+
|
| 420 |
+
# Update accumulator
|
| 421 |
+
accumulated_text += partial_text
|
| 422 |
+
main_output = accumulated_text
|
| 423 |
+
|
| 424 |
+
# Status message displays chunk info (e.g., timestamps)
|
| 425 |
+
status_msg = gr.update(visible=True, value=debug_info, elem_classes=[status])
|
| 426 |
+
|
| 427 |
+
# --- CASE 2: WARNING / INFO (System message) ---
|
| 428 |
+
elif status in ["warning", "info"]:
|
| 429 |
+
# result_data is the error or info message here
|
| 430 |
+
# Do not touch accumulated_text
|
| 431 |
+
status_msg = gr.update(visible=True, value=result_data, elem_classes=[status])
|
| 432 |
+
|
| 433 |
+
|
| 434 |
+
# --- CASE 3: DONE / ERROR---
|
| 435 |
+
elif status in ["done", "error"]:
|
| 436 |
+
logging.error(f"[ui] error ")
|
| 437 |
+
# Re-enable controls
|
| 438 |
+
is_streaming = is_active_stream(session_hash_code)
|
| 439 |
+
start_btn = gr.update(visible=is_streaming) # Show Start only if audio stream is active
|
| 440 |
+
stop_btn = gr.update(visible=False)
|
| 441 |
+
config_step = gr.update(interactive=True)
|
| 442 |
+
|
| 443 |
+
# result_data is the completion message
|
| 444 |
+
status_msg = gr.update(visible=True, value=result_data, elem_classes=[status])
|
| 445 |
+
|
| 446 |
+
# 2. Single dispatch to UI
|
| 447 |
+
# Expected order: [task_output, status_message_task, start_task_button, stop_task_button, config_step]
|
| 448 |
+
yield (
|
| 449 |
+
main_output,
|
| 450 |
+
status_msg,
|
| 451 |
+
start_btn,
|
| 452 |
+
stop_btn,
|
| 453 |
+
config_step
|
| 454 |
+
)
|
| 455 |
+
except Exception as e:
|
| 456 |
+
# --- ERROR HANDLING (GPU ABORT / RUNTIME ERROR) ---
|
| 457 |
+
error_msg = str(e)
|
| 458 |
+
logging.error(f"Task Error for {session_hash_code}: {error_msg}", exc_info=True)
|
| 459 |
+
|
| 460 |
+
# Detect specific Hugging Face / GPU errors
|
| 461 |
+
if "GPU task aborted" in error_msg or "CUDA out of memory" in error_msg or "Device" in error_msg:
|
| 462 |
+
display_msg = f"🛑 **System Error:** GPU Task Aborted. The model may have run out of memory. ({error_msg})"
|
| 463 |
+
else:
|
| 464 |
+
display_msg = f"⚠️ **Task Error:** {error_msg}"
|
| 465 |
+
|
| 466 |
+
# Update UI to reflect the crash
|
| 467 |
yield (
|
| 468 |
+
accumulated_text, # Keep what we managed to generate so far
|
| 469 |
+
gr.update(visible=True, value=display_msg, elem_classes=["error"]), # Show error in status box
|
| 470 |
+
gr.update(visible=True), # Re-enable Start button to allow retry
|
| 471 |
+
gr.update(visible=False), # Hide Stop button
|
| 472 |
+
gr.update(interactive=True) # Unlock config
|
| 473 |
)
|
| 474 |
|
|
|
|
| 475 |
def stop_task_fn(session_hash_code):
|
| 476 |
remove_active_task_flag_file(session_hash_code)
|
| 477 |
yield "Task stopped by user."
|