Photon08 commited on
Commit
d4b6fc6
·
1 Parent(s): 7d23870

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +59 -0
app.py ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import whisper
3
+ import pytube
4
+ import librosa
5
+ import streamlit as st
6
+ import numpy as np
7
+ from fpdf import FPDF
8
+
9
+
10
+
11
+ def predict(url=None, model = "medium", translation="No",tran_lang="en"):
12
+
13
+ model_m = whisper.load_model(model)
14
+
15
+ file_path = 'https://cf-courses-data.s3.ujs.cloud-object-storage.appdomain.cloud/IBM-GPXX0EPMEN/20220627_140242.mp4'
16
+
17
+ audio_35 = whisper.load_audio(file_path)
18
+
19
+ audio = whisper.pad_or_trim(audio_35)
20
+
21
+ mel = whisper.log_mel_spectrogram(audio).to(model_m.device)
22
+
23
+ _,probs = model_m.detect_language(mel)
24
+
25
+ lang_dict = sorted(probs)
26
+ video_url = url
27
+ v_data = pytube.YouTube(video_url)
28
+ speech = v_data.streams.get_audio_only()
29
+ test_audio_file = speech.download()
30
+
31
+ transcription = model_m.transcribe(test_audio_file,fp16=False)["text"]
32
+
33
+ if translation == "Yes":
34
+ trans = model_m.transcribe(file_path,language=tran_lang,fp16=False)["text"]
35
+ return lang_dict, transcription, trans
36
+ else:
37
+ return lang_dict, transcription
38
+
39
+ url = st.text_input(value="Please enter the YouTube url: ", label="YouTube URL")
40
+ tran_req = st.selectbox(label="Do you want to translate the transcript?",options=("Yes","No"))
41
+
42
+ if tran_req=="Yes":
43
+ lang = st.selectbox(label="Please select the required language: ", options=("en","fr","jp","bn","de","ga"))
44
+ else:
45
+ lang = "en"
46
+
47
+ if st.button("Generate"):
48
+ lang_d,transcription,trans = predict(url,translation=tran_req,tran_lang=lang)
49
+
50
+ st.write(lang_d)
51
+ pdf = FPDF()
52
+ pdf.add_page()
53
+
54
+ pdf.set_font("Arial", size = 15)
55
+
56
+ for words in transcription:
57
+ pdf.cell(200, 10, txt = x, ln = 1, align = 'L')
58
+ output = pdf.output("transcript.pdf")
59
+ st.download(label="Click here to download the transcript", data=output, mime='pdf',file_name="transcript.pdf")