Spaces:
Running
Running
Irpan
commited on
Commit
·
e2dd467
1
Parent(s):
8bef169
asr
Browse files
app.py
CHANGED
|
@@ -18,7 +18,7 @@ mms_transcribe = gr.Interface(
|
|
| 18 |
gr.Textbox(label="Uyghur Arabic Transcription"),
|
| 19 |
gr.Textbox(label="Uyghur Latin Transcription"),
|
| 20 |
],
|
| 21 |
-
|
| 22 |
title="Speech-to-text",
|
| 23 |
description=(
|
| 24 |
"Transcribe Uyghur speech audio from a microphone or input file."
|
|
@@ -40,7 +40,7 @@ mms_synthesize = gr.Interface(
|
|
| 40 |
outputs=[
|
| 41 |
gr.Audio(label="Generated Audio"),
|
| 42 |
],
|
| 43 |
-
|
| 44 |
title="Text-to-speech",
|
| 45 |
description=(
|
| 46 |
"Generate audio from input Uyghur text."
|
|
|
|
| 18 |
gr.Textbox(label="Uyghur Arabic Transcription"),
|
| 19 |
gr.Textbox(label="Uyghur Latin Transcription"),
|
| 20 |
],
|
| 21 |
+
examples=util.asr_examples,
|
| 22 |
title="Speech-to-text",
|
| 23 |
description=(
|
| 24 |
"Transcribe Uyghur speech audio from a microphone or input file."
|
|
|
|
| 40 |
outputs=[
|
| 41 |
gr.Audio(label="Generated Audio"),
|
| 42 |
],
|
| 43 |
+
examples=util.tts_examples,
|
| 44 |
title="Text-to-speech",
|
| 45 |
description=(
|
| 46 |
"Generate audio from input Uyghur text."
|
tts.py
CHANGED
|
@@ -53,15 +53,17 @@ def synthesize(text, model_id):
|
|
| 53 |
inputs = processor(text, return_tensors="pt").to(device)
|
| 54 |
|
| 55 |
with torch.no_grad():
|
| 56 |
-
output = model(**inputs).waveform.cpu() # Move output back to CPU for saving
|
| 57 |
|
| 58 |
output_path = "tts_output.wav"
|
| 59 |
sample_rate = model.config.sampling_rate
|
| 60 |
-
scipy.io.wavfile.write(output_path, rate=sample_rate, data=output
|
| 61 |
|
| 62 |
return output_path
|
| 63 |
|
| 64 |
def synthesize_turkic_tts(text):
|
|
|
|
|
|
|
| 65 |
text = normalization(text, 'uyghur')
|
| 66 |
|
| 67 |
with torch.no_grad():
|
|
|
|
| 53 |
inputs = processor(text, return_tensors="pt").to(device)
|
| 54 |
|
| 55 |
with torch.no_grad():
|
| 56 |
+
output = model(**inputs).waveform.cpu().numpy()[0] # Move output back to CPU for saving
|
| 57 |
|
| 58 |
output_path = "tts_output.wav"
|
| 59 |
sample_rate = model.config.sampling_rate
|
| 60 |
+
scipy.io.wavfile.write(output_path, rate=sample_rate, data=output)
|
| 61 |
|
| 62 |
return output_path
|
| 63 |
|
| 64 |
def synthesize_turkic_tts(text):
|
| 65 |
+
text = util.ug_arab_to_latn(text)
|
| 66 |
+
|
| 67 |
text = normalization(text, 'uyghur')
|
| 68 |
|
| 69 |
with torch.no_grad():
|