Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
| import os | |
| from pathlib import Path | |
| from httpx import AsyncClient | |
| import gradio as gr | |
| import numpy as np | |
| from dotenv import load_dotenv | |
| from fastrtc import ( | |
| AdditionalOutputs, | |
| ReplyOnPause, | |
| Stream, | |
| audio_to_bytes, | |
| get_turn_credentials_async, | |
| get_turn_credentials, | |
| ) | |
| from gradio.utils import get_space | |
| from languages import LANGUAGES | |
| cur_dir = Path(__file__).parent | |
| load_dotenv() | |
| client = AsyncClient(timeout=30) | |
| async def transcribe_file(audio: tuple[int, np.ndarray], language: str): | |
| response = await client.post( | |
| url="https://cw18rfhfqf3db1m8.us-east-1.aws.endpoints.huggingface.cloud/api/v1/audio/transcriptions", | |
| headers={"Authorization": f"Bearer {os.getenv('HF_TOKEN')}"}, | |
| files={"file": audio_to_bytes(audio)}, | |
| data={"response_format": "text", "language": language}, | |
| ) | |
| return response.text | |
| async def transcribe(audio: tuple[int, np.ndarray], transcript: str, language: str): | |
| text = await transcribe_file(audio, language) | |
| yield AdditionalOutputs(transcript + " " + text) | |
| transcript = gr.Textbox(label="Transcript") | |
| stream = Stream( | |
| ReplyOnPause(transcribe, input_sample_rate=48_100), | |
| modality="audio", | |
| mode="send", | |
| additional_inputs=[transcript, gr.Dropdown(choices=LANGUAGES, label="Language")], | |
| additional_outputs=[transcript], | |
| additional_outputs_handler=lambda a, b: b, | |
| rtc_configuration=get_turn_credentials_async, | |
| server_rtc_configuration=get_turn_credentials(ttl=604_800), | |
| concurrency_limit=20 if get_space() else None, | |
| time_limit=300, | |
| ui_args={"title": ""}, | |
| ) | |
| iface = gr.Interface( | |
| fn=transcribe_file, | |
| inputs=[gr.Audio(label="Upload Audio", sources=["upload", "microphone"]), gr.Dropdown(choices=LANGUAGES, label="Language")], | |
| outputs=gr.Textbox(label="Transcript"), | |
| ) | |
| with gr.Blocks() as demo: | |
| gr.HTML( | |
| """ | |
| <h1 style='text-align: center; display: flex; align-items: center; justify-content: center;'> | |
| <img src="/gradio_api/file=AV_Huggy.png" alt="Streaming Huggy" style="height: 50px; margin-right: 10px"> Really Fast Whisper | |
| </h1> | |
| """ | |
| ) | |
| gr.HTML( | |
| """ | |
| <h2 style='text-align: center'> | |
| Powered by <a href="https://huggingface.co/hfendpoints/whisper-large-v3">HF Inference Endpoints</a> and <a href="https://fastrtc.org/">FastRTC</a> | |
| </h2> | |
| """ | |
| ) | |
| with gr.Tabs(): | |
| with gr.Tab("Streaming"): | |
| gr.Markdown( | |
| "Grant access to the microphone and speak naturally. The transcript will be updated as you pause." | |
| ) | |
| stream.ui.render() | |
| with gr.Tab("File Upload"): | |
| iface.render() | |
| if __name__ == "__main__": | |
| demo.launch(allowed_paths=["AV_Huggy.png"]) | |