Spaces:

RobotsMali
/

RobotsMali_ASR_DEMO

Running

App Files Files Community

binaryMao commited on Oct 17

Commit

0dd5255

verified ·

1 Parent(s): 0942055

Update app.py

Browse files

Files changed (1) hide show

app.py +13 -9

app.py CHANGED Viewed

@@ -17,11 +17,12 @@ import nemo.collections.nlp as nemo_nlp
 # ----------------------------------------------------------------------
 # CONSTANTES DE CONFIGURATION
 # ----------------------------------------------------------------------
 ROBOTSMALI_MODELS = [
     "RobotsMali/soloba-ctc-0.6b-v0",
     "RobotsMali/soloni-114m-tdt-ctc-v1",
     "RobotsMali/soloni-114m-tdt-ctc-V0",
-    "RobotsMali/stt-bm-quartznet5x5-V0", # Modèles souvent en erreur (selon les logs), mais inclus.
     "RobotsMali/stt-bm-quartznet5x5-v1",
     "RobotsMali/soloba-ctc-0.6b-v1"
 ]
@@ -85,8 +86,12 @@ def load_punct_model():
 # ----------------------------------------------------------------------
 def transcribe_audio(model_name: str, audio_path: str):
     """
-    Effectue la transcription ASR de l'audio complet avec une barre de progression simulée.
     """
     if audio_path is None:
         yield "⚠️ Veuillez d'abord télécharger ou enregistrer un fichier audio."
         return
@@ -104,7 +109,7 @@ def transcribe_audio(model_name: str, audio_path: str):
         full_audio_data, sr = librosa.load(audio_path, sr=SR_TARGET, mono=True)
         total_duration = len(full_audio_data) / SR_TARGET
-        # Correction de la forme audio (squeeze) pour éviter l'erreur de "Output shape mismatch"
         segment_data = full_audio_data.squeeze()
         sf.write(temp_full_path, segment_data, SR_TARGET)
@@ -121,11 +126,10 @@ def transcribe_audio(model_name: str, audio_path: str):
         yield f"**[3/4] TRANSCRIPTION EN COURS...** Démarrage de l'inférence. ⏳"
         # --- BARRE DE PROGRESSION SIMULÉE ---
-        # Affiche une progression visuelle pendant l'attente de l'inférence GPU
         for progress_percent in range(0, 91, 10):
             time.sleep(0.3)
-            # Utilise gr.Progress pour une barre stylée en haut de l'interface
-            yield gr.Progress(progress_percent, total=100, desc=f"Progression ASR ({progress_percent}%)")
         yield f"**[3/4] FINALISATION...** Inférence en cours sur le GPU. 🚀"
         # ---------------------------------------------
@@ -138,7 +142,6 @@ def transcribe_audio(model_name: str, audio_path: str):
         if transcriptions and transcriptions[0]:
             hyp_object = transcriptions[0]
-            # Gère les différents formats de sortie de NeMo
             if hasattr(hyp_object, 'text'):
                 transcription_text_final = hyp_object.text.strip()
             elif isinstance(hyp_object, str):
@@ -161,7 +164,8 @@ def transcribe_audio(model_name: str, audio_path: str):
         punct_model = load_punct_model()
         if punct_model and transcription_text_final != "[Transcription vide ou échec ASR]":
             yield f"**[4/4] POST-TRAITEMENT...** Correction de la ponctuation et de la casse pour la lisibilité. ✨"
-            yield gr.Progress(100, total=100, desc="Progression ASR (100%)") # Termine la barre
             try:
                 corrected_list = punct_model.add_punctuation_capitalization([transcription_text_final])
@@ -179,7 +183,6 @@ def transcribe_audio(model_name: str, audio_path: str):
         # 2. PRÉSENTATION LYRICS PROPRE
         output += "**RÉSULTAT DE LA TRANSCRIPTION (Lyrics) :**\n"
-        # Formatage du texte pour l'affichage Markdown
         formatted_lyrics = processed_text.replace('\n', ' ').strip().replace('. ', '.\n\n>>> ').replace('? ', '?\n\n>>> ')
         if not formatted_lyrics.startswith('>>> '):
             formatted_lyrics = '>>> ' + formatted_lyrics
@@ -195,6 +198,7 @@ def transcribe_audio(model_name: str, audio_path: str):
         yield f"❌ Erreur critique lors du chargement : {str(e)}"
     except Exception as e:
         yield f"❌ Erreur générale lors de la transcription complète : {e}"
     finally:

 # ----------------------------------------------------------------------
 # CONSTANTES DE CONFIGURATION
 # ----------------------------------------------------------------------
+# Liste de modèles mise à jour et vérifiée (conforme à RobotsMali sur HF)
 ROBOTSMALI_MODELS = [
     "RobotsMali/soloba-ctc-0.6b-v0",
     "RobotsMali/soloni-114m-tdt-ctc-v1",
     "RobotsMali/soloni-114m-tdt-ctc-V0",
+    "RobotsMali/stt-bm-quartznet5x5-V0",
     "RobotsMali/stt-bm-quartznet5x5-v1",
     "RobotsMali/soloba-ctc-0.6b-v1"
 ]
 # ----------------------------------------------------------------------
 def transcribe_audio(model_name: str, audio_path: str):
     """
+    Effectue la transcription ASR de l'audio complet avec une barre de progression stylée.
     """
+    # CORRECTION DE L'ERREUR GRADIO : Initialisation correcte de gr.Progress
+    progress = gr.Progress()
+    progress(0, desc="Démarrage du traitement")
     if audio_path is None:
         yield "⚠️ Veuillez d'abord télécharger ou enregistrer un fichier audio."
         return
         full_audio_data, sr = librosa.load(audio_path, sr=SR_TARGET, mono=True)
         total_duration = len(full_audio_data) / SR_TARGET
+        # Correction de la forme audio (squeeze)
         segment_data = full_audio_data.squeeze()
         sf.write(temp_full_path, segment_data, SR_TARGET)
         yield f"**[3/4] TRANSCRIPTION EN COURS...** Démarrage de l'inférence. ⏳"
         # --- BARRE DE PROGRESSION SIMULÉE ---
         for progress_percent in range(0, 91, 10):
             time.sleep(0.3)
+            # Utilisation de la syntaxe correcte : progress(valeur_flottante, description)
+            progress(progress_percent / 100, desc=f"Progression ASR ({progress_percent}%)")
         yield f"**[3/4] FINALISATION...** Inférence en cours sur le GPU. 🚀"
         # ---------------------------------------------
         if transcriptions and transcriptions[0]:
             hyp_object = transcriptions[0]
             if hasattr(hyp_object, 'text'):
                 transcription_text_final = hyp_object.text.strip()
             elif isinstance(hyp_object, str):
         punct_model = load_punct_model()
         if punct_model and transcription_text_final != "[Transcription vide ou échec ASR]":
             yield f"**[4/4] POST-TRAITEMENT...** Correction de la ponctuation et de la casse pour la lisibilité. ✨"
+            # Termine la barre de progression
+            progress(1.0, desc="Progression ASR (100%)")
             try:
                 corrected_list = punct_model.add_punctuation_capitalization([transcription_text_final])
         # 2. PRÉSENTATION LYRICS PROPRE
         output += "**RÉSULTAT DE LA TRANSCRIPTION (Lyrics) :**\n"
         formatted_lyrics = processed_text.replace('\n', ' ').strip().replace('. ', '.\n\n>>> ').replace('? ', '?\n\n>>> ')
         if not formatted_lyrics.startswith('>>> '):
             formatted_lyrics = '>>> ' + formatted_lyrics
         yield f"❌ Erreur critique lors du chargement : {str(e)}"
     except Exception as e:
+        # L'erreur de progression étant corrigée, cette ligne gère les autres erreurs
         yield f"❌ Erreur générale lors de la transcription complète : {e}"
     finally: