ASR_API2

Sleeping

App Files Files Community

palli23 commited on Nov 30, 2025

Commit

7fa0abc

1 Parent(s): d817784

diarization1Mæló

Browse files

Files changed (1) hide show

app.py +25 -50

app.py CHANGED Viewed

@@ -1,25 +1,19 @@
 # ============================================================
-# app.py – Whisper-small + Pyannote 3.1 (ZeroGPU örugg útgáfa)
 # ============================================================
 import os
 import gradio as gr
 import spaces
 import tempfile
-import torch
 from transformers import pipeline
 from pyannote.audio import Pipeline
-from torch.serialization import safe_globals
-# ------------------------------------------------------------
-# STILLT MODELNÖFN
-# ------------------------------------------------------------
 ASR_MODEL = "palli23/whisper-small-sam_spjall"
-DIAR_MODEL = "pyannote/speaker-diarization-3.1"
-# ------------------------------------------------------------
-# Aðalfallið – keyrir á ZeroGPU (120s GPU max)
-# ------------------------------------------------------------
 @spaces.GPU(duration=120)
 def transcribe_with_diarization(audio_path):
@@ -27,73 +21,54 @@ def transcribe_with_diarization(audio_path):
         return "Hladdu upp hljóðskrá."
     # ----------------------------
-    # 1. PYTORCH SAFE GLOBALS FIX
     # ----------------------------
-    # PyTorch 2.6+ ZeroGPU unpickling patch – MANDATORY
-    with safe_globals([
-        torch.torch_version.TorchVersion,
-        "pyannote.audio.core.task.Specifications",
-        "pyannote.audio.core.model.Model",
-        "pyannote.audio.pipelines.speaker_diarization.SpeakerDiarization"
-    ]):
-        # ----------------------------
-        # 2. Load diarization pipeline
-        # ----------------------------
-        diarization = Pipeline.from_pretrained(
-            DIAR_MODEL,
-            token=os.getenv("HF_TOKEN")   # <--- RÉTT FYRIR PYANNOTE 3.1
-        ).to("cuda")
-    # Keyra diarization
     diar = diarization(audio_path)
     # ----------------------------
-    # 3. Whisper ASR
     # ----------------------------
     asr = pipeline(
         task="automatic-speech-recognition",
         model=ASR_MODEL,
-        device=0,
-        token=os.getenv("HF_TOKEN")
     )
     # ----------------------------
-    # 4. Skera út segment + Greina texta
     # ----------------------------
-    final_output = []
     for turn, _, speaker in diar.itertracks(yield_label=True):
-        # Vista tímabundna WAV fyrir hvert segment
         with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
             diar.crop(audio_path, turn).export(tmp.name, format="wav")
-            seg_path = tmp.name
-        # ASR texti
-        text = asr(seg_path)["text"].strip()
-        # Vista niðurstöðu
-        final_output.append(f"[MÆLENDI {speaker}] {text}")
-        # Hreinsa
-        os.unlink(seg_path)
-    return "\n".join(final_output) if final_output else "Ekkert heyrt í hljóðinu."
 # ------------------------------------------------------------
 # GRADIO UI
 # ------------------------------------------------------------
 with gr.Blocks() as demo:
-    gr.Markdown("# 🎙️ Íslenskt tal → texti + mælendagreining")
-    gr.Markdown("Whisper-small + pyannote 3.1 • Virkar á ZeroGPU • 5 mín hljóð max")
-    audio_input = gr.Audio(type="filepath", label="Hladdu upp hljóðskrá (.wav / .mp3)")
-    out_box = gr.Textbox(lines=30, label="Útskrift + mælendur")
-    run_button = gr.Button("Transcribe með mælendum", variant="primary")
-    run_button.click(transcribe_with_diarization, inputs=audio_input, outputs=out_box)
-# Spaces auth
 demo.launch(auth=("beta", "beta2025"))

 # ============================================================
+# app.py – Whisper-small + Pyannote 2.1.1 (ZeroGPU örugg útgáfa)
 # ============================================================
 import os
 import gradio as gr
 import spaces
 import tempfile
 from transformers import pipeline
 from pyannote.audio import Pipeline
 ASR_MODEL = "palli23/whisper-small-sam_spjall"
+DIAR_MODEL = "pyannote/speaker-diarization"   # <--- ATH: ekki 3.1
 @spaces.GPU(duration=120)
 def transcribe_with_diarization(audio_path):
         return "Hladdu upp hljóðskrá."
     # ----------------------------
+    # 1. Load diarization pipeline
     # ----------------------------
+    diarization = Pipeline.from_pretrained(
+        DIAR_MODEL,
+        use_auth_token=os.getenv("HF_TOKEN")  # pyannote 2.x notar þetta
+    ).to("cuda")
     diar = diarization(audio_path)
     # ----------------------------
+    # 2. Whisper ASR
     # ----------------------------
     asr = pipeline(
         task="automatic-speech-recognition",
         model=ASR_MODEL,
+        device=0
     )
     # ----------------------------
+    # 3. Skera út segment + ASR
     # ----------------------------
+    output_lines = []
     for turn, _, speaker in diar.itertracks(yield_label=True):
         with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
             diar.crop(audio_path, turn).export(tmp.name, format="wav")
+            seg_file = tmp.name
+        text = asr(seg_file)["text"].strip()
+        output_lines.append(f"[MÆLENDI {speaker}] {text}")
+        os.unlink(seg_file)
+    return "\n".join(output_lines) or "Enginn texti fannst."
 # ------------------------------------------------------------
 # GRADIO UI
 # ------------------------------------------------------------
 with gr.Blocks() as demo:
+    gr.Markdown("# 🎙️ Íslenskt ASR + mælendagreining")
+    gr.Markdown("Whisper-small + pyannote 2.1.1 (ZeroGPU örugg útgáfa)")
+    audio = gr.Audio(type="filepath", label="Hlaða inn hljóði (.wav or .mp3)")
+    out = gr.Textbox(lines=30, label="Útskrift með mælendum")
+    btn = gr.Button("Transcribe með mælendum", variant="primary")
+    btn.click(transcribe_with_diarization, inputs=audio, outputs=out)
 demo.launch(auth=("beta", "beta2025"))