Archime commited on
Commit
0c03412
·
1 Parent(s): b6a38da

correctly GPU ABORT

Browse files
Files changed (3) hide show
  1. app/session_utils.py +14 -22
  2. app/ui_utils.py +3 -1
  3. app/utils.py +6 -16
app/session_utils.py CHANGED
@@ -138,29 +138,10 @@ def remove_session_hash_code_data(session_hash_code: str):
138
  logging.warning(f"[{session_hash_code}] Failed to update {ACTIVE_SESSIONS_HASH_FILE}: {e}")
139
 
140
  # --- Define all possible session_hash_code file patterns ---
141
- files_to_remove = [
142
- get_active_task_flag_file(session_hash_code),
143
- get_active_stream_flag_file(session_hash_code),
144
- ]
145
-
146
  # --- Remove all temporary files ---
147
- for fname in files_to_remove:
148
- path = os.path.join(TMP_DIR, fname)
149
- if os.path.exists(path):
150
- try:
151
- os.remove(path)
152
- logging.debug(f"[{session_hash_code}] Removed file: {fname}")
153
- except Exception as e:
154
- logging.warning(f"[{session_hash_code}] Failed to remove file {fname}: {e}")
155
-
156
- # --- Remove chunk folder if exists ---
157
- chunk_dir = os.path.join(TMP_DIR, f"chunks_{session_hash_code}")
158
- if os.path.isdir(chunk_dir):
159
- try:
160
- shutil.rmtree(chunk_dir)
161
- logging.debug(f"[{session_hash_code}] Removed chunk folder: chunks_{session_hash_code}")
162
- except Exception as e:
163
- logging.warning(f"[{session_hash_code}] Failed to remove chunk folder: {e}")
164
 
165
  logging.info(f"[{session_hash_code}] session_hash_code fully reset.")
166
 
@@ -259,5 +240,16 @@ def remove_active_task_flag_file(session_hash_code: str):
259
  except Exception as e:
260
  logging.warning(f"[{session_hash_code}] Failed to remove file {fname}: {e}")
261
 
 
 
 
 
 
 
 
 
 
 
 
262
  def get_session_hashe_chunks_dir(session_hash_code: str):
263
  return os.path.join(TMP_DIR, f"{NAME_FOLDER_CHUNKS}{session_hash_code}")
 
138
  logging.warning(f"[{session_hash_code}] Failed to update {ACTIVE_SESSIONS_HASH_FILE}: {e}")
139
 
140
  # --- Define all possible session_hash_code file patterns ---
 
 
 
 
 
141
  # --- Remove all temporary files ---
142
+ remove_active_task_flag_file(session_hash_code)
143
+ remove_active_stream_flag_file(session_hash_code)
144
+ remove_chunk_folder(session_hash_code)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
145
 
146
  logging.info(f"[{session_hash_code}] session_hash_code fully reset.")
147
 
 
240
  except Exception as e:
241
  logging.warning(f"[{session_hash_code}] Failed to remove file {fname}: {e}")
242
 
243
+
244
+ def remove_chunk_folder(session_hash_code: str) :
245
+ # --- Remove chunk folder if exists ---
246
+ chunk_dir = os.path.join(TMP_DIR, f"chunks_{session_hash_code}")
247
+ if os.path.isdir(chunk_dir):
248
+ try:
249
+ shutil.rmtree(chunk_dir)
250
+ logging.debug(f"[{session_hash_code}] Removed chunk folder: chunks_{session_hash_code}")
251
+ except Exception as e:
252
+ logging.warning(f"[{session_hash_code}] Failed to remove chunk folder: {e}")
253
+
254
  def get_session_hashe_chunks_dir(session_hash_code: str):
255
  return os.path.join(TMP_DIR, f"{NAME_FOLDER_CHUNKS}{session_hash_code}")
app/ui_utils.py CHANGED
@@ -5,6 +5,7 @@ import os
5
 
6
  from app.utils import (
7
  remove_active_task_flag_file,
 
8
  task_fake,
9
  is_active_task,
10
  is_active_stream,
@@ -456,7 +457,8 @@ def start_task_asr_ast(
456
  # --- ERROR HANDLING (GPU ABORT / RUNTIME ERROR) ---
457
  error_msg = str(e)
458
  logging.error(f"Task Error for {session_hash_code}: {error_msg}", exc_info=True)
459
-
 
460
  # Detect specific Hugging Face / GPU errors
461
  if "GPU task aborted" in error_msg or "CUDA out of memory" in error_msg or "Device" in error_msg:
462
  display_msg = f"🛑 **System Error:** GPU Task Aborted. The model may have run out of memory. ({error_msg})"
 
5
 
6
  from app.utils import (
7
  remove_active_task_flag_file,
8
+ remove_chunk_folder,
9
  task_fake,
10
  is_active_task,
11
  is_active_stream,
 
457
  # --- ERROR HANDLING (GPU ABORT / RUNTIME ERROR) ---
458
  error_msg = str(e)
459
  logging.error(f"Task Error for {session_hash_code}: {error_msg}", exc_info=True)
460
+ remove_active_task_flag_file(session_hash_code)
461
+ remove_chunk_folder(session_hash_code)
462
  # Detect specific Hugging Face / GPU errors
463
  if "GPU task aborted" in error_msg or "CUDA out of memory" in error_msg or "Device" in error_msg:
464
  display_msg = f"🛑 **System Error:** GPU Task Aborted. The model may have run out of memory. ({error_msg})"
app/utils.py CHANGED
@@ -20,6 +20,7 @@ from app.session_utils import (
20
  get_active_stream_flag_file,
21
  remove_active_stream_flag_file,
22
  remove_active_task_flag_file,
 
23
  get_session_hashe_chunks_dir
24
  )
25
  from app.supported_languages import (
@@ -189,7 +190,7 @@ asr_model = None
189
  # ASR_MODEL = load_model()
190
 
191
 
192
- @spaces.GPU
193
  def task_fake(session_hash_code: str,
194
  task_type, lang_source, lang_target,
195
  chunk_secs, left_context_secs, right_context_secs,
@@ -256,22 +257,13 @@ def task_fake(session_hash_code: str,
256
  return
257
 
258
  finally:
259
- if os.path.exists(active_flag):
260
- os.remove(active_flag)
261
- logging.info(f"[{session_hash_code}] task stopped.")
262
-
263
- try:
264
- if os.path.exists(chunk_dir) and not os.listdir(chunk_dir):
265
- os.rmdir(chunk_dir)
266
- logging.debug(f"[{session_hash_code}] Cleaned up empty chunk dir.")
267
- except Exception as e:
268
- logging.error(f"[{session_hash_code}] Cleanup error: {e}")
269
- yield (f"Cleanup error: {e}", "error", None)
270
-
271
  logging.info(f"[{session_hash_code}] Exiting task loop.")
272
  # yield ("Task finished and cleaned up.", "done", None)
273
 
274
 
 
275
  @spaces.GPU
276
  def task(session_hash_code: str,
277
  task_type, lang_source, lang_target,
@@ -356,9 +348,7 @@ def task(session_hash_code: str,
356
  yield (f"Unexpected error: {e}", "error", None)
357
 
358
  finally:
359
- if os.path.exists(active_flag):
360
- os.remove(active_flag)
361
- logging.info(f"[{session_hash_code}] task stopped.")
362
 
363
  try:
364
  if os.path.exists(chunk_dir) and not os.listdir(chunk_dir):
 
20
  get_active_stream_flag_file,
21
  remove_active_stream_flag_file,
22
  remove_active_task_flag_file,
23
+ remove_chunk_folder,
24
  get_session_hashe_chunks_dir
25
  )
26
  from app.supported_languages import (
 
190
  # ASR_MODEL = load_model()
191
 
192
 
193
+ @spaces.GPU(duration=10)
194
  def task_fake(session_hash_code: str,
195
  task_type, lang_source, lang_target,
196
  chunk_secs, left_context_secs, right_context_secs,
 
257
  return
258
 
259
  finally:
260
+ remove_active_task_flag_file(session_hash_code)
261
+ remove_chunk_folder(session_hash_code)
 
 
 
 
 
 
 
 
 
 
262
  logging.info(f"[{session_hash_code}] Exiting task loop.")
263
  # yield ("Task finished and cleaned up.", "done", None)
264
 
265
 
266
+
267
  @spaces.GPU
268
  def task(session_hash_code: str,
269
  task_type, lang_source, lang_target,
 
348
  yield (f"Unexpected error: {e}", "error", None)
349
 
350
  finally:
351
+ remove_active_task_flag_file(session_hash_code)
 
 
352
 
353
  try:
354
  if os.path.exists(chunk_dir) and not os.listdir(chunk_dir):