palli23 commited on
Commit
7fa0abc
·
1 Parent(s): d817784

diarization1Mæló

Browse files
Files changed (1) hide show
  1. app.py +25 -50
app.py CHANGED
@@ -1,25 +1,19 @@
1
  # ============================================================
2
- # app.py – Whisper-small + Pyannote 3.1 (ZeroGPU örugg útgáfa)
3
  # ============================================================
4
 
5
  import os
6
  import gradio as gr
7
  import spaces
8
  import tempfile
9
- import torch
10
  from transformers import pipeline
11
  from pyannote.audio import Pipeline
12
- from torch.serialization import safe_globals
13
 
14
- # ------------------------------------------------------------
15
- # STILLT MODELNÖFN
16
- # ------------------------------------------------------------
17
  ASR_MODEL = "palli23/whisper-small-sam_spjall"
18
- DIAR_MODEL = "pyannote/speaker-diarization-3.1"
 
19
 
20
- # ------------------------------------------------------------
21
- # Aðalfallið – keyrir á ZeroGPU (120s GPU max)
22
- # ------------------------------------------------------------
23
  @spaces.GPU(duration=120)
24
  def transcribe_with_diarization(audio_path):
25
 
@@ -27,73 +21,54 @@ def transcribe_with_diarization(audio_path):
27
  return "Hladdu upp hljóðskrá."
28
 
29
  # ----------------------------
30
- # 1. PYTORCH SAFE GLOBALS FIX
31
  # ----------------------------
32
- # PyTorch 2.6+ ZeroGPU unpickling patch – MANDATORY
33
- with safe_globals([
34
- torch.torch_version.TorchVersion,
35
- "pyannote.audio.core.task.Specifications",
36
- "pyannote.audio.core.model.Model",
37
- "pyannote.audio.pipelines.speaker_diarization.SpeakerDiarization"
38
- ]):
39
-
40
- # ----------------------------
41
- # 2. Load diarization pipeline
42
- # ----------------------------
43
- diarization = Pipeline.from_pretrained(
44
- DIAR_MODEL,
45
- token=os.getenv("HF_TOKEN") # <--- RÉTT FYRIR PYANNOTE 3.1
46
- ).to("cuda")
47
-
48
- # Keyra diarization
49
  diar = diarization(audio_path)
50
 
51
  # ----------------------------
52
- # 3. Whisper ASR
53
  # ----------------------------
54
  asr = pipeline(
55
  task="automatic-speech-recognition",
56
  model=ASR_MODEL,
57
- device=0,
58
- token=os.getenv("HF_TOKEN")
59
  )
60
 
61
  # ----------------------------
62
- # 4. Skera út segment + Greina texta
63
  # ----------------------------
64
- final_output = []
65
 
66
  for turn, _, speaker in diar.itertracks(yield_label=True):
67
 
68
- # Vista tímabundna WAV fyrir hvert segment
69
  with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
70
  diar.crop(audio_path, turn).export(tmp.name, format="wav")
71
- seg_path = tmp.name
72
-
73
- # ASR texti
74
- text = asr(seg_path)["text"].strip()
75
 
76
- # Vista niðurstöðu
77
- final_output.append(f"[MÆLENDI {speaker}] {text}")
78
 
79
- # Hreinsa
80
- os.unlink(seg_path)
81
 
82
- return "\n".join(final_output) if final_output else "Ekkert heyrt í hljóðinu."
83
 
84
 
85
  # ------------------------------------------------------------
86
  # GRADIO UI
87
  # ------------------------------------------------------------
88
  with gr.Blocks() as demo:
89
- gr.Markdown("# 🎙️ Íslenskt tal → texti + mælendagreining")
90
- gr.Markdown("Whisper-small + pyannote 3.1 • Virkar á ZeroGPU 5 mín hljóð max")
91
 
92
- audio_input = gr.Audio(type="filepath", label="Hladdu upp hljóðskrá (.wav / .mp3)")
93
- out_box = gr.Textbox(lines=30, label="Útskrift + mælendur")
94
 
95
- run_button = gr.Button("Transcribe með mælendum", variant="primary")
96
- run_button.click(transcribe_with_diarization, inputs=audio_input, outputs=out_box)
97
 
98
- # Spaces auth
99
  demo.launch(auth=("beta", "beta2025"))
 
1
  # ============================================================
2
+ # app.py – Whisper-small + Pyannote 2.1.1 (ZeroGPU örugg útgáfa)
3
  # ============================================================
4
 
5
  import os
6
  import gradio as gr
7
  import spaces
8
  import tempfile
 
9
  from transformers import pipeline
10
  from pyannote.audio import Pipeline
 
11
 
12
+
 
 
13
  ASR_MODEL = "palli23/whisper-small-sam_spjall"
14
+ DIAR_MODEL = "pyannote/speaker-diarization" # <--- ATH: ekki 3.1
15
+
16
 
 
 
 
17
  @spaces.GPU(duration=120)
18
  def transcribe_with_diarization(audio_path):
19
 
 
21
  return "Hladdu upp hljóðskrá."
22
 
23
  # ----------------------------
24
+ # 1. Load diarization pipeline
25
  # ----------------------------
26
+ diarization = Pipeline.from_pretrained(
27
+ DIAR_MODEL,
28
+ use_auth_token=os.getenv("HF_TOKEN") # pyannote 2.x notar þetta
29
+ ).to("cuda")
30
+
 
 
 
 
 
 
 
 
 
 
 
 
31
  diar = diarization(audio_path)
32
 
33
  # ----------------------------
34
+ # 2. Whisper ASR
35
  # ----------------------------
36
  asr = pipeline(
37
  task="automatic-speech-recognition",
38
  model=ASR_MODEL,
39
+ device=0
 
40
  )
41
 
42
  # ----------------------------
43
+ # 3. Skera út segment + ASR
44
  # ----------------------------
45
+ output_lines = []
46
 
47
  for turn, _, speaker in diar.itertracks(yield_label=True):
48
 
 
49
  with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
50
  diar.crop(audio_path, turn).export(tmp.name, format="wav")
51
+ seg_file = tmp.name
 
 
 
52
 
53
+ text = asr(seg_file)["text"].strip()
54
+ output_lines.append(f"[MÆLENDI {speaker}] {text}")
55
 
56
+ os.unlink(seg_file)
 
57
 
58
+ return "\n".join(output_lines) or "Enginn texti fannst."
59
 
60
 
61
  # ------------------------------------------------------------
62
  # GRADIO UI
63
  # ------------------------------------------------------------
64
  with gr.Blocks() as demo:
65
+ gr.Markdown("# 🎙️ Íslenskt ASR + mælendagreining")
66
+ gr.Markdown("Whisper-small + pyannote 2.1.1 (ZeroGPU örugg útgáfa)")
67
 
68
+ audio = gr.Audio(type="filepath", label="Hlaða inn hljóði (.wav or .mp3)")
69
+ out = gr.Textbox(lines=30, label="Útskrift með mælendum")
70
 
71
+ btn = gr.Button("Transcribe með mælendum", variant="primary")
72
+ btn.click(transcribe_with_diarization, inputs=audio, outputs=out)
73
 
 
74
  demo.launch(auth=("beta", "beta2025"))