Spaces:

Photon08
/

speech_to_text

Runtime error

File size: 2,462 Bytes

d4b6fc6
 
 
 
 
 
 
fb8c0b6
667630b
d4b6fc6
 
 
0ca3a16
d4b6fc6
0ca3a16
d4b6fc6
8407d4b
 
 
 
 
6f66c35
d4b6fc6
 
 
 
 
 
 
33186cb
 
 
 
 
 
 
 
d4b6fc6
 
 
 
 
 
 
 
 
8508782
33186cb
d4b6fc6
 
 
667630b
d4b6fc6
 
 
13aaabd
d4b6fc6
 
 
 
be6e7bb
d4b6fc6
667630b
e01eec8
667630b
 
 
5f57e08
 
 
 
 
e01eec8
667630b
5f57e08
667630b
 
e01eec8
667630b

import torch
import whisper
import pytube
import librosa
import streamlit as st
import numpy as np
from fpdf import FPDF
from reportlab.pdfgen.canvas import Canvas
import time



def predict(url=None, translation="No",tran_lang="en"):

    model_m = whisper.load_model("tiny")

    #file_path = 'https://cf-courses-data.s3.ujs.cloud-object-storage.appdomain.cloud/IBM-GPXX0EPMEN/20220627_140242.mp4'
    file_path = 'https://www.youtube.com/watch?v=-WbN61qtTGQ'
    data = pytube.YouTube(file_path)
    speech = data.streams.get_audio_only()
    audio_file = speech.download()
    audio_35 = whisper.load_audio(audio_file)

    audio = whisper.pad_or_trim(audio_35)

    mel = whisper.log_mel_spectrogram(audio).to(model_m.device)

    _,probs = model_m.detect_language(mel)

    p = -1
    for key in probs:
        if probs[key] >p:
            p = probs[key]
    for keys in probs:
        if probs[keys] == p:
            detected_lang = keys

    lang_dict = sorted(probs)
    video_url = url
    v_data = pytube.YouTube(video_url)
    speech = v_data.streams.get_audio_only()
    test_audio_file = speech.download()

    transcription = model_m.transcribe(test_audio_file,fp16=False)["text"]

    if translation == "Yes":
        trans = model_m.transcribe(test_audio_file,language=tran_lang,fp16=False)["text"]
        return detected_lang, transcription, trans
    else:
        return lang_dict, transcription

url = st.text_input(label="Please enter the YouTube url: ")
tran_req = st.selectbox(label="Do you want to translate the transcript?",options=("Yes","No"))

if tran_req=="Yes":
    lang = st.selectbox(label="Please select the required language: ", options=("en","fr","ja"))
else:
    lang = "en"

if st.button("Generate"):
    st.progress(0, "Fetching the video...")
    lang_d,transcription,trans = predict(url,translation=tran_req,tran_lang=lang)
    
    st.progress(50,"Speech to Text engine running...")
    time.sleep(1)
    
    st.write("Detected language:",lang_d)
    #canvas = Canvas("transcript.pdf")
    #canvas.drawString(72, 72, transcription)
    #canvas.save()
    #st.download_button(label="Click here to download the transcript", data=canvas, mime='text/csv',file_name="transcript.pdf")
    st.write(transcription)
    st.progress(75,"Translation in progress..")
    time.sleep(1)
    st.write("Translation: ")
    
    st.write(trans)
    st.progress(100,"Completed")
    st.sucess("Speech to text converted successfully!")