Spaces:
Runtime error
Runtime error
File size: 2,462 Bytes
d4b6fc6 fb8c0b6 667630b d4b6fc6 0ca3a16 d4b6fc6 0ca3a16 d4b6fc6 8407d4b 6f66c35 d4b6fc6 33186cb d4b6fc6 8508782 33186cb d4b6fc6 667630b d4b6fc6 13aaabd d4b6fc6 be6e7bb d4b6fc6 667630b e01eec8 667630b 5f57e08 e01eec8 667630b 5f57e08 667630b e01eec8 667630b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 |
import torch
import whisper
import pytube
import librosa
import streamlit as st
import numpy as np
from fpdf import FPDF
from reportlab.pdfgen.canvas import Canvas
import time
def predict(url=None, translation="No",tran_lang="en"):
model_m = whisper.load_model("tiny")
#file_path = 'https://cf-courses-data.s3.ujs.cloud-object-storage.appdomain.cloud/IBM-GPXX0EPMEN/20220627_140242.mp4'
file_path = 'https://www.youtube.com/watch?v=-WbN61qtTGQ'
data = pytube.YouTube(file_path)
speech = data.streams.get_audio_only()
audio_file = speech.download()
audio_35 = whisper.load_audio(audio_file)
audio = whisper.pad_or_trim(audio_35)
mel = whisper.log_mel_spectrogram(audio).to(model_m.device)
_,probs = model_m.detect_language(mel)
p = -1
for key in probs:
if probs[key] >p:
p = probs[key]
for keys in probs:
if probs[keys] == p:
detected_lang = keys
lang_dict = sorted(probs)
video_url = url
v_data = pytube.YouTube(video_url)
speech = v_data.streams.get_audio_only()
test_audio_file = speech.download()
transcription = model_m.transcribe(test_audio_file,fp16=False)["text"]
if translation == "Yes":
trans = model_m.transcribe(test_audio_file,language=tran_lang,fp16=False)["text"]
return detected_lang, transcription, trans
else:
return lang_dict, transcription
url = st.text_input(label="Please enter the YouTube url: ")
tran_req = st.selectbox(label="Do you want to translate the transcript?",options=("Yes","No"))
if tran_req=="Yes":
lang = st.selectbox(label="Please select the required language: ", options=("en","fr","ja"))
else:
lang = "en"
if st.button("Generate"):
st.progress(0, "Fetching the video...")
lang_d,transcription,trans = predict(url,translation=tran_req,tran_lang=lang)
st.progress(50,"Speech to Text engine running...")
time.sleep(1)
st.write("Detected language:",lang_d)
#canvas = Canvas("transcript.pdf")
#canvas.drawString(72, 72, transcription)
#canvas.save()
#st.download_button(label="Click here to download the transcript", data=canvas, mime='text/csv',file_name="transcript.pdf")
st.write(transcription)
st.progress(75,"Translation in progress..")
time.sleep(1)
st.write("Translation: ")
st.write(trans)
st.progress(100,"Completed")
st.sucess("Speech to text converted successfully!") |