Spaces:
Runtime error
Runtime error
| import torch | |
| import whisper | |
| import pytube | |
| import librosa | |
| import streamlit as st | |
| import numpy as np | |
| from fpdf import FPDF | |
| from reportlab.pdfgen.canvas import Canvas | |
| import time | |
| def predict(url=None, translation="No",tran_lang="en"): | |
| model_m = whisper.load_model("tiny") | |
| #file_path = 'https://cf-courses-data.s3.ujs.cloud-object-storage.appdomain.cloud/IBM-GPXX0EPMEN/20220627_140242.mp4' | |
| file_path = 'https://www.youtube.com/watch?v=-WbN61qtTGQ' | |
| data = pytube.YouTube(file_path) | |
| speech = data.streams.get_audio_only() | |
| audio_file = speech.download() | |
| audio_35 = whisper.load_audio(audio_file) | |
| audio = whisper.pad_or_trim(audio_35) | |
| mel = whisper.log_mel_spectrogram(audio).to(model_m.device) | |
| _,probs = model_m.detect_language(mel) | |
| p = -1 | |
| for key in probs: | |
| if probs[key] >p: | |
| p = probs[key] | |
| for keys in probs: | |
| if probs[keys] == p: | |
| detected_lang = keys | |
| lang_dict = sorted(probs) | |
| video_url = url | |
| v_data = pytube.YouTube(video_url) | |
| speech = v_data.streams.get_audio_only() | |
| test_audio_file = speech.download() | |
| transcription = model_m.transcribe(test_audio_file,fp16=False)["text"] | |
| if translation == "Yes": | |
| trans = model_m.transcribe(test_audio_file,language=tran_lang,fp16=False)["text"] | |
| return detected_lang, transcription, trans | |
| else: | |
| return lang_dict, transcription | |
| st.image(image="https://www.respeecher.com/hubfs/What-is-Text-to-Speech-TTS%29-Initial-Speech-Synthesis-Explained-Respeecher-voice-cloning-software.jpeg",output_format="JPEG") | |
| st.title("Sppech to Text generator") | |
| st.write("This app uses an open source neural net called Whisper(developed by OpenAI)") | |
| url = st.text_input(label="Please enter the YouTube url: ") | |
| tran_req = st.selectbox(label="Do you want to translate the transcript?",options=("Yes","No")) | |
| if tran_req=="Yes": | |
| lang = st.selectbox(label="Please select the required language: ", options=("en","fr","ja")) | |
| else: | |
| lang = "en" | |
| if st.button("Generate"): | |
| st.spinner("Fetching the video...") | |
| lang_d,transcription,trans = predict(url,translation=tran_req,tran_lang=lang) | |
| st.spinner("Speech to Text engine running...") | |
| time.sleep(1) | |
| st.write("Detected language:",lang_d) | |
| #canvas = Canvas("transcript.pdf") | |
| #canvas.drawString(72, 72, transcription) | |
| #canvas.save() | |
| #st.download_button(label="Click here to download the transcript", data=canvas, mime='text/csv',file_name="transcript.pdf") | |
| st.write(transcription) | |
| st.spinner("Translation in progress..") | |
| time.sleep(1) | |
| st.write("Translation: ") | |
| st.write(trans) | |
| st.spinner("Completed") | |
| st.success("Speech to text converted successfully!") |