palli23 commited on
Commit
3407dd3
·
1 Parent(s): 90a78b4

diarization1Mæló

Browse files
Files changed (1) hide show
  1. app.py +41 -26
app.py CHANGED
@@ -1,12 +1,24 @@
1
- # app.py Whisper-small + Mælendagreining (pyannote 3.1) – VIRKAR Á ZeroGPU
2
  import os
3
  import gradio as gr
4
  import spaces
5
- from transformers import pipeline
6
- from pyannote.audio import Pipeline
7
- import torch
8
  import tempfile
 
 
9
  from torch.serialization import safe_globals
 
 
 
 
 
 
 
 
 
 
 
 
 
10
 
11
  MODEL_NAME = "palli23/whisper-small-sam_spjall"
12
 
@@ -15,49 +27,52 @@ def transcribe_with_diarization(audio_path):
15
  if not audio_path:
16
  return "Hladdu upp hljóðskrá"
17
 
18
- # FIX: PyTorch 2.6+ unpickling villu (ZeroGPU krefst þess)
19
  with safe_globals([
20
  torch.torch_version.TorchVersion,
21
- 'pyannote.audio.core.task.Specifications'
 
 
 
 
22
  ]):
23
  diarization = Pipeline.from_pretrained(
24
  "pyannote/speaker-diarization-3.1",
25
- token=os.getenv("HF_TOKEN")
26
  ).to("cuda")
27
 
28
- # Keyra mælendagreiningu
29
  dia = diarization(audio_path)
30
-
31
- # Whisper-small
32
  asr = pipeline(
33
  "automatic-speech-recognition",
34
  model=MODEL_NAME,
35
  device=0,
36
- token=os.getenv("HF_TOKEN")
37
  )
38
-
 
39
  result = []
40
  for turn, _, speaker in dia.itertracks(yield_label=True):
41
  with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
42
- dia.crop(audio_path, turn).export(f.name, format="wav")
43
- segment_path = f.name
44
-
45
- text = asr(segment_path)["text"].strip()
 
46
  result.append(f"[MÆLENDI {speaker}] {text}")
47
- os.unlink(segment_path)
48
-
49
- return "\n".join(result) or "Ekkert heyrt"
50
 
51
- # Gradio interface
 
52
  with gr.Blocks() as demo:
53
  gr.Markdown("# Íslenskt ASR + Mælendagreining")
54
- gr.Markdown("**Whisper-small + pyannote 3.1 · Full podcast-transcript**")
55
- gr.Markdown("Hladdu upp .mp3 / .wav (allt að 5 mín)")
56
 
57
  audio = gr.Audio(type="filepath", label="Hljóðskrá")
58
- btn = gr.Button("Transcribe með mælendum", variant="primary", size="lg")
59
- out = gr.Textbox(lines=35, label="Útskrift með mælendum")
60
-
61
  btn.click(transcribe_with_diarization, inputs=audio, outputs=out)
62
 
63
- demo.launch(auth=("beta", "beta2025"))
 
1
+ # app.py for HF Spaces (ZeroGPU safe pyannote)
2
  import os
3
  import gradio as gr
4
  import spaces
 
 
 
5
  import tempfile
6
+ import torch
7
+
8
  from torch.serialization import safe_globals
9
+ from pyannote.audio.core.model import Model
10
+ from pyannote.audio.core.task import Task, Specifications
11
+ from pyannote.audio.pipelines.speaker_diarization import SpeakerDiarization
12
+ from typing import OrderedDict
13
+
14
+ from transformers import pipeline
15
+ from pyannote.audio import Pipeline
16
+
17
+ # Required patches for ZeroGPU
18
+ os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = "1"
19
+ torch.serialization.add_safe_globals({
20
+ "OrderedDict": OrderedDict,
21
+ })
22
 
23
  MODEL_NAME = "palli23/whisper-small-sam_spjall"
24
 
 
27
  if not audio_path:
28
  return "Hladdu upp hljóðskrá"
29
 
30
+ # Fix strict unpickling in torch 2.6 (ZeroGPU)
31
  with safe_globals([
32
  torch.torch_version.TorchVersion,
33
+ Model,
34
+ Task,
35
+ Specifications,
36
+ SpeakerDiarization,
37
+ OrderedDict,
38
  ]):
39
  diarization = Pipeline.from_pretrained(
40
  "pyannote/speaker-diarization-3.1",
41
+ use_auth_token=os.getenv("HF_TOKEN")
42
  ).to("cuda")
43
 
44
+ # Run diarization
45
  dia = diarization(audio_path)
46
+
47
+ # Whisper model
48
  asr = pipeline(
49
  "automatic-speech-recognition",
50
  model=MODEL_NAME,
51
  device=0,
52
+ use_auth_token=os.getenv("HF_TOKEN"),
53
  )
54
+
55
+ # segment-by-segment ASR
56
  result = []
57
  for turn, _, speaker in dia.itertracks(yield_label=True):
58
  with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
59
+ diarization.crop(audio_path, turn).export(f.name, format="wav")
60
+ chunk = f.name
61
+
62
+ text = asr(chunk)["text"].strip()
63
+ os.unlink(chunk)
64
  result.append(f"[MÆLENDI {speaker}] {text}")
 
 
 
65
 
66
+ return "\n".join(result) or "Enginn texti heyrðist."
67
+
68
  with gr.Blocks() as demo:
69
  gr.Markdown("# Íslenskt ASR + Mælendagreining")
70
+ gr.Markdown("Whisper-small + pyannote 3.1 (ZeroGPU örugg útgáfa)")
 
71
 
72
  audio = gr.Audio(type="filepath", label="Hljóðskrá")
73
+ btn = gr.Button("Transcribe með mælendum")
74
+ out = gr.Textbox(lines=35, label="Úttak")
75
+
76
  btn.click(transcribe_with_diarization, inputs=audio, outputs=out)
77
 
78
+ demo.launch(auth=("beta", "beta2025"))