Sidak Singh commited on
Commit
66a7fab
·
1 Parent(s): 7b7174c

can detect sentece ends

Browse files
__pycache__/transcriber.cpython-310.pyc CHANGED
Binary files a/__pycache__/transcriber.cpython-310.pyc and b/__pycache__/transcriber.cpython-310.pyc differ
 
app.py CHANGED
@@ -2,6 +2,7 @@ import gradio as gr
2
  import numpy as np
3
  from transcriber import AudioProcessor
4
 
 
5
  # Create processor instance with more conservative settings
6
  processor = AudioProcessor(model_size="tiny.en", device="cpu")
7
 
@@ -22,6 +23,8 @@ def process_mic_audio(audio):
22
  # Get current transcription
23
  transcription = processor.get_transcription()
24
  print(transcription)
 
 
25
 
26
  # Return status update and transcription
27
  buffer_seconds = buffer_size / processor.sample_rate
 
2
  import numpy as np
3
  from transcriber import AudioProcessor
4
 
5
+
6
  # Create processor instance with more conservative settings
7
  processor = AudioProcessor(model_size="tiny.en", device="cpu")
8
 
 
23
  # Get current transcription
24
  transcription = processor.get_transcription()
25
  print(transcription)
26
+ transcription = str(transcription)
27
+
28
 
29
  # Return status update and transcription
30
  buffer_seconds = buffer_size / processor.sample_rate
transcriber.py CHANGED
@@ -3,6 +3,8 @@ import threading
3
  import time
4
  from faster_whisper import WhisperModel
5
  import scipy.signal as signal
 
 
6
 
7
  class AudioProcessor:
8
  def __init__(self, model_size="tiny.en", device="cpu", compute_type="int8"):
@@ -26,6 +28,8 @@ class AudioProcessor:
26
  self.audio_model = WhisperModel(model_size, device=device, compute_type=compute_type)
27
  print(f"Initialized {model_size} model on {device}")
28
 
 
 
29
  def _trim_buffer_intelligently(self):
30
  """
31
  Trim the buffer while preserving transcription continuity
@@ -273,7 +277,8 @@ class AudioProcessor:
273
  def get_transcription(self):
274
  """Get the current transcription text"""
275
  with self.lock:
276
- return self.full_transcription
 
277
 
278
  def get_playback_audio(self):
279
  """Get properly formatted audio for Gradio playback"""
 
3
  import time
4
  from faster_whisper import WhisperModel
5
  import scipy.signal as signal
6
+ from typing import List
7
+ from punctuators.models import SBDModelONNX
8
 
9
  class AudioProcessor:
10
  def __init__(self, model_size="tiny.en", device="cpu", compute_type="int8"):
 
28
  self.audio_model = WhisperModel(model_size, device=device, compute_type=compute_type)
29
  print(f"Initialized {model_size} model on {device}")
30
 
31
+ self.sentence_end_detect = SBDModelONNX.from_pretrained("sbd_multi_lang")
32
+
33
  def _trim_buffer_intelligently(self):
34
  """
35
  Trim the buffer while preserving transcription continuity
 
277
  def get_transcription(self):
278
  """Get the current transcription text"""
279
  with self.lock:
280
+ results: List[List[str]] = self.sentence_end_detect.infer([self.full_transcription])
281
+ return results[0]
282
 
283
  def get_playback_audio(self):
284
  """Get properly formatted audio for Gradio playback"""
working.py DELETED
@@ -1,28 +0,0 @@
1
- transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-base.en")
2
-
3
- def transcribe(stream, new_chunk):
4
- sr, y = new_chunk
5
-
6
- # Convert to mono if stereo
7
- if y.ndim > 1:
8
- y = y.mean(axis=1)
9
-
10
- y = y.astype(np.float32)
11
- y /= np.max(np.abs(y))
12
-
13
- if stream is not None:
14
- stream = np.concatenate([stream, y])
15
- else:
16
- stream = y
17
-
18
- # Return the stream as state and a string representation of the array for display
19
- return stream, str(stream)
20
-
21
- demo = gr.Interface(
22
- transcribe,
23
- ["state", gr.Audio(sources=["microphone"], streaming=True)],
24
- ["state", "text"],
25
- live=True,
26
- )
27
-
28
- demo.launch()