Spaces:

Photon08
/

speech_to_text

Runtime error

App Files Files Community

speech_to_text / app.py

Photon08

Update app.py

37e5baf almost 3 years ago

raw

history blame contribute delete

2.75 kB

	import torch
	import whisper
	import pytube
	import librosa
	import streamlit as st
	import numpy as np
	from fpdf import FPDF
	from reportlab.pdfgen.canvas import Canvas
	import time



	def predict(url=None, translation="No",tran_lang="en"):

	model_m = whisper.load_model("tiny")

	#file_path = 'https://cf-courses-data.s3.ujs.cloud-object-storage.appdomain.cloud/IBM-GPXX0EPMEN/20220627_140242.mp4'
	file_path = 'https://www.youtube.com/watch?v=-WbN61qtTGQ'
	data = pytube.YouTube(file_path)
	speech = data.streams.get_audio_only()
	audio_file = speech.download()
	audio_35 = whisper.load_audio(audio_file)

	audio = whisper.pad_or_trim(audio_35)

	mel = whisper.log_mel_spectrogram(audio).to(model_m.device)

	_,probs = model_m.detect_language(mel)

	p = -1
	for key in probs:
	if probs[key] >p:
	p = probs[key]
	for keys in probs:
	if probs[keys] == p:
	detected_lang = keys

	lang_dict = sorted(probs)
	video_url = url
	v_data = pytube.YouTube(video_url)
	speech = v_data.streams.get_audio_only()
	test_audio_file = speech.download()

	transcription = model_m.transcribe(test_audio_file,fp16=False)["text"]

	if translation == "Yes":
	trans = model_m.transcribe(test_audio_file,language=tran_lang,fp16=False)["text"]
	return detected_lang, transcription, trans
	else:
	return lang_dict, transcription
	st.image(image="https://www.respeecher.com/hubfs/What-is-Text-to-Speech-TTS%29-Initial-Speech-Synthesis-Explained-Respeecher-voice-cloning-software.jpeg",output_format="JPEG")
	st.title("Sppech to Text generator")

	st.write("This app uses an open source neural net called Whisper(developed by OpenAI)")

	url = st.text_input(label="Please enter the YouTube url: ")
	tran_req = st.selectbox(label="Do you want to translate the transcript?",options=("Yes","No"))

	if tran_req=="Yes":
	lang = st.selectbox(label="Please select the required language: ", options=("en","fr","ja"))
	else:
	lang = "en"

	if st.button("Generate"):
	st.spinner("Fetching the video...")
	lang_d,transcription,trans = predict(url,translation=tran_req,tran_lang=lang)

	st.spinner("Speech to Text engine running...")
	time.sleep(1)

	st.write("Detected language:",lang_d)
	#canvas = Canvas("transcript.pdf")
	#canvas.drawString(72, 72, transcription)
	#canvas.save()
	#st.download_button(label="Click here to download the transcript", data=canvas, mime='text/csv',file_name="transcript.pdf")
	st.write(transcription)
	st.spinner("Translation in progress..")
	time.sleep(1)
	st.write("Translation: ")

	st.write(trans)
	st.spinner("Completed")
	st.success("Speech to text converted successfully!")