Spaces:

Photon08
/

speech_to_text

Runtime error

App Files Files Community

speech_to_text / app.py

Photon08

Update app.py

be6e7bb almost 3 years ago

raw

history blame

2.46 kB

	import torch
	import whisper
	import pytube
	import librosa
	import streamlit as st
	import numpy as np
	from fpdf import FPDF
	from reportlab.pdfgen.canvas import Canvas
	import time



	def predict(url=None, translation="No",tran_lang="en"):

	model_m = whisper.load_model("tiny")

	#file_path = 'https://cf-courses-data.s3.ujs.cloud-object-storage.appdomain.cloud/IBM-GPXX0EPMEN/20220627_140242.mp4'
	file_path = 'https://www.youtube.com/watch?v=-WbN61qtTGQ'
	data = pytube.YouTube(file_path)
	speech = data.streams.get_audio_only()
	audio_file = speech.download()
	audio_35 = whisper.load_audio(audio_file)

	audio = whisper.pad_or_trim(audio_35)

	mel = whisper.log_mel_spectrogram(audio).to(model_m.device)

	_,probs = model_m.detect_language(mel)

	p = -1
	for key in probs:
	if probs[key] >p:
	p = probs[key]
	for keys in probs:
	if probs[keys] == p:
	detected_lang = keys

	lang_dict = sorted(probs)
	video_url = url
	v_data = pytube.YouTube(video_url)
	speech = v_data.streams.get_audio_only()
	test_audio_file = speech.download()

	transcription = model_m.transcribe(test_audio_file,fp16=False)["text"]

	if translation == "Yes":
	trans = model_m.transcribe(test_audio_file,language=tran_lang,fp16=False)["text"]
	return detected_lang, transcription, trans
	else:
	return lang_dict, transcription

	url = st.text_input(label="Please enter the YouTube url: ")
	tran_req = st.selectbox(label="Do you want to translate the transcript?",options=("Yes","No"))

	if tran_req=="Yes":
	lang = st.selectbox(label="Please select the required language: ", options=("en","fr","ja"))
	else:
	lang = "en"

	if st.button("Generate"):
	st.progress(0, "Fetching the video...")
	lang_d,transcription,trans = predict(url,translation=tran_req,tran_lang=lang)

	st.progress(50,"Speech to Text engine running...")
	time.sleep(1)

	st.write("Detected language:",lang_d)
	#canvas = Canvas("transcript.pdf")
	#canvas.drawString(72, 72, transcription)
	#canvas.save()
	#st.download_button(label="Click here to download the transcript", data=canvas, mime='text/csv',file_name="transcript.pdf")
	st.write(transcription)
	st.progress(75,"Translation in progress..")
	time.sleep(1)
	st.write("Translation: ")

	st.write(trans)
	st.progress(100,"Completed")
	st.sucess("Speech to text converted successfully!")