Spaces:

ixxan
/

uyghur-speech-models

Running

Irpan commited on Dec 23, 2024

Commit

e2dd467

1 Parent(s): 8bef169

asr

Files changed (2) hide show

app.py CHANGED Viewed

@@ -18,7 +18,7 @@ mms_transcribe = gr.Interface(
         gr.Textbox(label="Uyghur Arabic Transcription"),
         gr.Textbox(label="Uyghur Latin Transcription"),
     ],
-    #examples=util.asr_examples,
     title="Speech-to-text",
     description=(
         "Transcribe Uyghur speech audio from a microphone or input file."
@@ -40,7 +40,7 @@ mms_synthesize = gr.Interface(
     outputs=[
         gr.Audio(label="Generated Audio"),
     ],
-    #examples=util.tts_examples,
     title="Text-to-speech",
     description=(
         "Generate audio from input Uyghur text."

         gr.Textbox(label="Uyghur Arabic Transcription"),
         gr.Textbox(label="Uyghur Latin Transcription"),
     ],
+    examples=util.asr_examples,
     title="Speech-to-text",
     description=(
         "Transcribe Uyghur speech audio from a microphone or input file."
     outputs=[
         gr.Audio(label="Generated Audio"),
     ],
+    examples=util.tts_examples,
     title="Text-to-speech",
     description=(
         "Generate audio from input Uyghur text."

tts.py CHANGED Viewed

@@ -53,15 +53,17 @@ def synthesize(text, model_id):
     inputs = processor(text, return_tensors="pt").to(device)
     with torch.no_grad():
-        output = model(**inputs).waveform.cpu()  # Move output back to CPU for saving
     output_path = "tts_output.wav"
     sample_rate = model.config.sampling_rate
-    scipy.io.wavfile.write(output_path, rate=sample_rate, data=output.numpy()[0])
     return output_path
 def synthesize_turkic_tts(text):
     text = normalization(text, 'uyghur')
     with torch.no_grad():

     inputs = processor(text, return_tensors="pt").to(device)
     with torch.no_grad():
+        output = model(**inputs).waveform.cpu().numpy()[0]  # Move output back to CPU for saving
     output_path = "tts_output.wav"
     sample_rate = model.config.sampling_rate
+    scipy.io.wavfile.write(output_path, rate=sample_rate, data=output)
     return output_path
 def synthesize_turkic_tts(text):
+    text = util.ug_arab_to_latn(text)
     text = normalization(text, 'uyghur')
     with torch.no_grad():