Spaces:

freddyaboulton
/

really-fast-whisper

Running on CPU Upgrade

App Files Files Community

really-fast-whisper / app.py

mfuntowicz HF Staff

update link to endpoint

327eac0 verified 7 months ago

raw

history blame

2.76 kB

	import os
	from pathlib import Path
	from httpx import AsyncClient

	import gradio as gr
	import numpy as np
	from dotenv import load_dotenv
	from fastrtc import (
	AdditionalOutputs,
	ReplyOnPause,
	Stream,
	audio_to_bytes,
	get_turn_credentials_async,
	get_turn_credentials,
	)
	from gradio.utils import get_space
	from languages import LANGUAGES

	cur_dir = Path(__file__).parent

	load_dotenv()


	client = AsyncClient(timeout=30)


	async def transcribe_file(audio: tuple[int, np.ndarray], language: str):
	response = await client.post(
	url="https://cw18rfhfqf3db1m8.us-east-1.aws.endpoints.huggingface.cloud/api/v1/audio/transcriptions",
	headers={"Authorization": f"Bearer {os.getenv('HF_TOKEN')}"},
	files={"file": audio_to_bytes(audio)},
	data={"response_format": "text", "language": language},
	)
	return response.text


	async def transcribe(audio: tuple[int, np.ndarray], transcript: str, language: str):
	text = await transcribe_file(audio, language)
	yield AdditionalOutputs(transcript + " " + text)


	transcript = gr.Textbox(label="Transcript")
	stream = Stream(
	ReplyOnPause(transcribe, input_sample_rate=48_100),
	modality="audio",
	mode="send",
	additional_inputs=[transcript, gr.Dropdown(choices=LANGUAGES, label="Language")],
	additional_outputs=[transcript],
	additional_outputs_handler=lambda a, b: b,
	rtc_configuration=get_turn_credentials_async,
	server_rtc_configuration=get_turn_credentials(ttl=604_800),
	concurrency_limit=20 if get_space() else None,
	time_limit=300,
	ui_args={"title": ""},
	)

	iface = gr.Interface(
	fn=transcribe_file,
	inputs=[gr.Audio(label="Upload Audio", sources=["upload", "microphone"]), gr.Dropdown(choices=LANGUAGES, label="Language")],
	outputs=gr.Textbox(label="Transcript"),
	)


	with gr.Blocks() as demo:
	gr.HTML(
	"""
	<h1 style='text-align: center; display: flex; align-items: center; justify-content: center;'>
	<img src="/gradio_api/file=AV_Huggy.png" alt="Streaming Huggy" style="height: 50px; margin-right: 10px"> Really Fast Whisper
	</h1>
	"""
	)
	gr.HTML(
	"""
	<h2 style='text-align: center'>
	Powered by <a href="https://huggingface.co/hfendpoints/whisper-large-v3">HF Inference Endpoints</a> and <a href="https://fastrtc.org/">FastRTC</a>
	</h2>
	"""
	)
	with gr.Tabs():
	with gr.Tab("Streaming"):
	gr.Markdown(
	"Grant access to the microphone and speak naturally. The transcript will be updated as you pause."
	)
	stream.ui.render()
	with gr.Tab("File Upload"):
	iface.render()
	if __name__ == "__main__":
	demo.launch(allowed_paths=["AV_Huggy.png"])