Spaces:
Running
on
Zero
Running
on
Zero
correctly GPU ABORT
Browse files- app/session_utils.py +14 -22
- app/ui_utils.py +3 -1
- app/utils.py +6 -16
app/session_utils.py
CHANGED
|
@@ -138,29 +138,10 @@ def remove_session_hash_code_data(session_hash_code: str):
|
|
| 138 |
logging.warning(f"[{session_hash_code}] Failed to update {ACTIVE_SESSIONS_HASH_FILE}: {e}")
|
| 139 |
|
| 140 |
# --- Define all possible session_hash_code file patterns ---
|
| 141 |
-
files_to_remove = [
|
| 142 |
-
get_active_task_flag_file(session_hash_code),
|
| 143 |
-
get_active_stream_flag_file(session_hash_code),
|
| 144 |
-
]
|
| 145 |
-
|
| 146 |
# --- Remove all temporary files ---
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
try:
|
| 151 |
-
os.remove(path)
|
| 152 |
-
logging.debug(f"[{session_hash_code}] Removed file: {fname}")
|
| 153 |
-
except Exception as e:
|
| 154 |
-
logging.warning(f"[{session_hash_code}] Failed to remove file {fname}: {e}")
|
| 155 |
-
|
| 156 |
-
# --- Remove chunk folder if exists ---
|
| 157 |
-
chunk_dir = os.path.join(TMP_DIR, f"chunks_{session_hash_code}")
|
| 158 |
-
if os.path.isdir(chunk_dir):
|
| 159 |
-
try:
|
| 160 |
-
shutil.rmtree(chunk_dir)
|
| 161 |
-
logging.debug(f"[{session_hash_code}] Removed chunk folder: chunks_{session_hash_code}")
|
| 162 |
-
except Exception as e:
|
| 163 |
-
logging.warning(f"[{session_hash_code}] Failed to remove chunk folder: {e}")
|
| 164 |
|
| 165 |
logging.info(f"[{session_hash_code}] session_hash_code fully reset.")
|
| 166 |
|
|
@@ -259,5 +240,16 @@ def remove_active_task_flag_file(session_hash_code: str):
|
|
| 259 |
except Exception as e:
|
| 260 |
logging.warning(f"[{session_hash_code}] Failed to remove file {fname}: {e}")
|
| 261 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 262 |
def get_session_hashe_chunks_dir(session_hash_code: str):
|
| 263 |
return os.path.join(TMP_DIR, f"{NAME_FOLDER_CHUNKS}{session_hash_code}")
|
|
|
|
| 138 |
logging.warning(f"[{session_hash_code}] Failed to update {ACTIVE_SESSIONS_HASH_FILE}: {e}")
|
| 139 |
|
| 140 |
# --- Define all possible session_hash_code file patterns ---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 141 |
# --- Remove all temporary files ---
|
| 142 |
+
remove_active_task_flag_file(session_hash_code)
|
| 143 |
+
remove_active_stream_flag_file(session_hash_code)
|
| 144 |
+
remove_chunk_folder(session_hash_code)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 145 |
|
| 146 |
logging.info(f"[{session_hash_code}] session_hash_code fully reset.")
|
| 147 |
|
|
|
|
| 240 |
except Exception as e:
|
| 241 |
logging.warning(f"[{session_hash_code}] Failed to remove file {fname}: {e}")
|
| 242 |
|
| 243 |
+
|
| 244 |
+
def remove_chunk_folder(session_hash_code: str) :
|
| 245 |
+
# --- Remove chunk folder if exists ---
|
| 246 |
+
chunk_dir = os.path.join(TMP_DIR, f"chunks_{session_hash_code}")
|
| 247 |
+
if os.path.isdir(chunk_dir):
|
| 248 |
+
try:
|
| 249 |
+
shutil.rmtree(chunk_dir)
|
| 250 |
+
logging.debug(f"[{session_hash_code}] Removed chunk folder: chunks_{session_hash_code}")
|
| 251 |
+
except Exception as e:
|
| 252 |
+
logging.warning(f"[{session_hash_code}] Failed to remove chunk folder: {e}")
|
| 253 |
+
|
| 254 |
def get_session_hashe_chunks_dir(session_hash_code: str):
|
| 255 |
return os.path.join(TMP_DIR, f"{NAME_FOLDER_CHUNKS}{session_hash_code}")
|
app/ui_utils.py
CHANGED
|
@@ -5,6 +5,7 @@ import os
|
|
| 5 |
|
| 6 |
from app.utils import (
|
| 7 |
remove_active_task_flag_file,
|
|
|
|
| 8 |
task_fake,
|
| 9 |
is_active_task,
|
| 10 |
is_active_stream,
|
|
@@ -456,7 +457,8 @@ def start_task_asr_ast(
|
|
| 456 |
# --- ERROR HANDLING (GPU ABORT / RUNTIME ERROR) ---
|
| 457 |
error_msg = str(e)
|
| 458 |
logging.error(f"Task Error for {session_hash_code}: {error_msg}", exc_info=True)
|
| 459 |
-
|
|
|
|
| 460 |
# Detect specific Hugging Face / GPU errors
|
| 461 |
if "GPU task aborted" in error_msg or "CUDA out of memory" in error_msg or "Device" in error_msg:
|
| 462 |
display_msg = f"🛑 **System Error:** GPU Task Aborted. The model may have run out of memory. ({error_msg})"
|
|
|
|
| 5 |
|
| 6 |
from app.utils import (
|
| 7 |
remove_active_task_flag_file,
|
| 8 |
+
remove_chunk_folder,
|
| 9 |
task_fake,
|
| 10 |
is_active_task,
|
| 11 |
is_active_stream,
|
|
|
|
| 457 |
# --- ERROR HANDLING (GPU ABORT / RUNTIME ERROR) ---
|
| 458 |
error_msg = str(e)
|
| 459 |
logging.error(f"Task Error for {session_hash_code}: {error_msg}", exc_info=True)
|
| 460 |
+
remove_active_task_flag_file(session_hash_code)
|
| 461 |
+
remove_chunk_folder(session_hash_code)
|
| 462 |
# Detect specific Hugging Face / GPU errors
|
| 463 |
if "GPU task aborted" in error_msg or "CUDA out of memory" in error_msg or "Device" in error_msg:
|
| 464 |
display_msg = f"🛑 **System Error:** GPU Task Aborted. The model may have run out of memory. ({error_msg})"
|
app/utils.py
CHANGED
|
@@ -20,6 +20,7 @@ from app.session_utils import (
|
|
| 20 |
get_active_stream_flag_file,
|
| 21 |
remove_active_stream_flag_file,
|
| 22 |
remove_active_task_flag_file,
|
|
|
|
| 23 |
get_session_hashe_chunks_dir
|
| 24 |
)
|
| 25 |
from app.supported_languages import (
|
|
@@ -189,7 +190,7 @@ asr_model = None
|
|
| 189 |
# ASR_MODEL = load_model()
|
| 190 |
|
| 191 |
|
| 192 |
-
@spaces.GPU
|
| 193 |
def task_fake(session_hash_code: str,
|
| 194 |
task_type, lang_source, lang_target,
|
| 195 |
chunk_secs, left_context_secs, right_context_secs,
|
|
@@ -256,22 +257,13 @@ def task_fake(session_hash_code: str,
|
|
| 256 |
return
|
| 257 |
|
| 258 |
finally:
|
| 259 |
-
|
| 260 |
-
|
| 261 |
-
logging.info(f"[{session_hash_code}] task stopped.")
|
| 262 |
-
|
| 263 |
-
try:
|
| 264 |
-
if os.path.exists(chunk_dir) and not os.listdir(chunk_dir):
|
| 265 |
-
os.rmdir(chunk_dir)
|
| 266 |
-
logging.debug(f"[{session_hash_code}] Cleaned up empty chunk dir.")
|
| 267 |
-
except Exception as e:
|
| 268 |
-
logging.error(f"[{session_hash_code}] Cleanup error: {e}")
|
| 269 |
-
yield (f"Cleanup error: {e}", "error", None)
|
| 270 |
-
|
| 271 |
logging.info(f"[{session_hash_code}] Exiting task loop.")
|
| 272 |
# yield ("Task finished and cleaned up.", "done", None)
|
| 273 |
|
| 274 |
|
|
|
|
| 275 |
@spaces.GPU
|
| 276 |
def task(session_hash_code: str,
|
| 277 |
task_type, lang_source, lang_target,
|
|
@@ -356,9 +348,7 @@ def task(session_hash_code: str,
|
|
| 356 |
yield (f"Unexpected error: {e}", "error", None)
|
| 357 |
|
| 358 |
finally:
|
| 359 |
-
|
| 360 |
-
os.remove(active_flag)
|
| 361 |
-
logging.info(f"[{session_hash_code}] task stopped.")
|
| 362 |
|
| 363 |
try:
|
| 364 |
if os.path.exists(chunk_dir) and not os.listdir(chunk_dir):
|
|
|
|
| 20 |
get_active_stream_flag_file,
|
| 21 |
remove_active_stream_flag_file,
|
| 22 |
remove_active_task_flag_file,
|
| 23 |
+
remove_chunk_folder,
|
| 24 |
get_session_hashe_chunks_dir
|
| 25 |
)
|
| 26 |
from app.supported_languages import (
|
|
|
|
| 190 |
# ASR_MODEL = load_model()
|
| 191 |
|
| 192 |
|
| 193 |
+
@spaces.GPU(duration=10)
|
| 194 |
def task_fake(session_hash_code: str,
|
| 195 |
task_type, lang_source, lang_target,
|
| 196 |
chunk_secs, left_context_secs, right_context_secs,
|
|
|
|
| 257 |
return
|
| 258 |
|
| 259 |
finally:
|
| 260 |
+
remove_active_task_flag_file(session_hash_code)
|
| 261 |
+
remove_chunk_folder(session_hash_code)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 262 |
logging.info(f"[{session_hash_code}] Exiting task loop.")
|
| 263 |
# yield ("Task finished and cleaned up.", "done", None)
|
| 264 |
|
| 265 |
|
| 266 |
+
|
| 267 |
@spaces.GPU
|
| 268 |
def task(session_hash_code: str,
|
| 269 |
task_type, lang_source, lang_target,
|
|
|
|
| 348 |
yield (f"Unexpected error: {e}", "error", None)
|
| 349 |
|
| 350 |
finally:
|
| 351 |
+
remove_active_task_flag_file(session_hash_code)
|
|
|
|
|
|
|
| 352 |
|
| 353 |
try:
|
| 354 |
if os.path.exists(chunk_dir) and not os.listdir(chunk_dir):
|