speech_to_text / app.py
Photon08's picture
Update app.py
be6e7bb
raw
history blame
2.46 kB
import torch
import whisper
import pytube
import librosa
import streamlit as st
import numpy as np
from fpdf import FPDF
from reportlab.pdfgen.canvas import Canvas
import time
def predict(url=None, translation="No",tran_lang="en"):
model_m = whisper.load_model("tiny")
#file_path = 'https://cf-courses-data.s3.ujs.cloud-object-storage.appdomain.cloud/IBM-GPXX0EPMEN/20220627_140242.mp4'
file_path = 'https://www.youtube.com/watch?v=-WbN61qtTGQ'
data = pytube.YouTube(file_path)
speech = data.streams.get_audio_only()
audio_file = speech.download()
audio_35 = whisper.load_audio(audio_file)
audio = whisper.pad_or_trim(audio_35)
mel = whisper.log_mel_spectrogram(audio).to(model_m.device)
_,probs = model_m.detect_language(mel)
p = -1
for key in probs:
if probs[key] >p:
p = probs[key]
for keys in probs:
if probs[keys] == p:
detected_lang = keys
lang_dict = sorted(probs)
video_url = url
v_data = pytube.YouTube(video_url)
speech = v_data.streams.get_audio_only()
test_audio_file = speech.download()
transcription = model_m.transcribe(test_audio_file,fp16=False)["text"]
if translation == "Yes":
trans = model_m.transcribe(test_audio_file,language=tran_lang,fp16=False)["text"]
return detected_lang, transcription, trans
else:
return lang_dict, transcription
url = st.text_input(label="Please enter the YouTube url: ")
tran_req = st.selectbox(label="Do you want to translate the transcript?",options=("Yes","No"))
if tran_req=="Yes":
lang = st.selectbox(label="Please select the required language: ", options=("en","fr","ja"))
else:
lang = "en"
if st.button("Generate"):
st.progress(0, "Fetching the video...")
lang_d,transcription,trans = predict(url,translation=tran_req,tran_lang=lang)
st.progress(50,"Speech to Text engine running...")
time.sleep(1)
st.write("Detected language:",lang_d)
#canvas = Canvas("transcript.pdf")
#canvas.drawString(72, 72, transcription)
#canvas.save()
#st.download_button(label="Click here to download the transcript", data=canvas, mime='text/csv',file_name="transcript.pdf")
st.write(transcription)
st.progress(75,"Translation in progress..")
time.sleep(1)
st.write("Translation: ")
st.write(trans)
st.progress(100,"Completed")
st.sucess("Speech to text converted successfully!")