Spaces:
Sleeping
Sleeping
| # coding=utf8 | |
| # Youtube Video Translator | |
| # Developed by Ruslan Magana Vsevolodovna | |
| # https://ruslanmv.com/ | |
| # importing all necessary libraries | |
| import httpcore | |
| #setattr(httpcore, 'SyncHTTPTransport', Any) | |
| import pathlib | |
| import sys, os | |
| from gtts import gTTS | |
| import gradio as gr | |
| import os | |
| import speech_recognition as sr | |
| from googletrans import Translator, constants | |
| from pprint import pprint | |
| from moviepy.editor import * | |
| from pytube import YouTube | |
| from youtube_transcript_api import YouTubeTranscriptApi | |
| from utils import * | |
| import json | |
| import re | |
| from pytube import YouTube | |
| from yt_dlp import YoutubeDL | |
| from yt_dlp import YoutubeDL | |
| import os | |
| import yt_dlp | |
| def download_video(url): | |
| """ | |
| Downloads a video from YouTube using yt-dlp with browser authentication. | |
| """ | |
| print("Starting download...") | |
| ydl_opts = { | |
| 'format': 'bestvideo+bestaudio/best', # Ensures best quality | |
| 'merge_output_format': 'mp4', # Ensures final output is MP4 | |
| 'outtmpl': '%(title)s.%(ext)s', # Saves file with video title | |
| 'quiet': False, # Shows progress | |
| 'cookies': 'youtube_cookies.txt', # Use exported cookies | |
| 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36', # Mimic browser | |
| } | |
| try: | |
| with yt_dlp.YoutubeDL(ydl_opts) as ydl: | |
| info = ydl.extract_info(url, download=True) | |
| local_file = ydl.prepare_filename(info) # Get output filename | |
| print(f"✅ Downloaded: {local_file}") | |
| return local_file | |
| except Exception as e: | |
| print(f"❌ Download failed: {str(e)}") | |
| return None | |
| # Example Usage | |
| url = "https://www.youtube.com/watch?v=uLVRZE8OAI4" | |
| download_video(url) | |
| def validate_youtube(url): | |
| """ | |
| Validates a YouTube URL, checks if the video exists, and returns whether its length exceeds 10 minutes. | |
| Uses yt-dlp for more robust URL handling. | |
| :param url: str - YouTube video URL | |
| :return: bool - True if the URL is invalid or video is longer than 10 minutes, otherwise False | |
| """ | |
| try: | |
| with YoutubeDL({'quiet': True, 'no_warnings': True}) as ydl: | |
| info = ydl.extract_info(url, download=False) | |
| video_length = info.get('duration') # Video length in seconds | |
| if video_length is None: # Handle cases where duration isn't available. | |
| print("Could not determine video length.") | |
| return True # Treat as invalid for now. Consider returning None if you want to handle differently. | |
| if video_length > 600: | |
| print("Your video is longer than 10 minutes.") | |
| return True | |
| else: | |
| print("Your video is 10 minutes or shorter.") | |
| return False | |
| except Exception as e: | |
| print(f"Error: The provided URL is invalid or not accessible. ({e})") | |
| return True # Return True since the URL is invalid | |
| def validate_url(url): | |
| import validators | |
| if not validators.url(url): | |
| print("Hi there URL seems invalid ") | |
| return True | |
| else: | |
| return False | |
| def cleanup(): | |
| import pathlib | |
| import glob | |
| types = ('*.mp4', '*.wav') # the tuple of file types | |
| #Finding mp4 and wave files | |
| junks = [] | |
| for files in types: | |
| junks.extend(glob.glob(files)) | |
| try: | |
| # Deleting those files | |
| for junk in junks: | |
| print("Deleting",junk) | |
| # Setting the path for the file to delete | |
| file = pathlib.Path(junk) | |
| # Calling the unlink method on the path | |
| file.unlink() | |
| except Exception: | |
| print("I cannot delete the file because it is being used by another process") | |
| def getSize(filename): | |
| st = os.stat(filename) | |
| return st.st_size | |
| def clean_transcript(transcript_list): | |
| script = "" | |
| for text in transcript_list: | |
| t = text["text"] | |
| if( (t != '[music]') and \ | |
| (t != '[Music]') and \ | |
| (t != '[музыка]') and \ | |
| (t != '[Музыка]') and \ | |
| (t != '[musik]') and \ | |
| (t != '[Musik]') and \ | |
| (t != '[musica]') and \ | |
| (t != '[Musica]') and \ | |
| (t != '[música]') and \ | |
| (t != '[Música]') and \ | |
| (t != '[音楽]') and \ | |
| (t != '[音乐]') | |
| ) : | |
| script += t + " " | |
| return script | |
| def get_transcript(url,desired_language): | |
| id_you= url[url.index("=")+1:] | |
| try: | |
| # retrieve the available transcripts | |
| transcript_list = YouTubeTranscriptApi.list_transcripts(id_you) | |
| except Exception: | |
| print('TranscriptsDisabled:') | |
| is_translated = False | |
| return " ", " ", is_translated | |
| lista=[] | |
| transcript_translation_languages=[] | |
| # iterate over all available transcripts | |
| for transcript in transcript_list: | |
| lista.extend([ | |
| transcript.language_code, | |
| transcript.is_generated, | |
| transcript.is_translatable, | |
| transcript_translation_languages.append(transcript.translation_languages), | |
| ]) | |
| print(lista) | |
| n_size=int(len(lista)/4) | |
| print("There are {} avialable scripts".format(n_size)) | |
| import numpy as np | |
| matrix = np.array(lista) | |
| shape = (n_size,4) | |
| matrix=matrix.reshape(shape) | |
| matrix=matrix.tolist() | |
| is_manually=False | |
| is_automatic=False | |
| for lista in matrix: | |
| #print(lista) | |
| language_code=lista[0] | |
| is_generated=lista[1] | |
| is_translatable=lista[2] | |
| if not is_generated and is_translatable : | |
| print("Script found manually generated") | |
| is_manually=True | |
| language_code_man=language_code | |
| if is_generated and is_translatable : | |
| print("Script found automatic generated") | |
| is_automatic=True | |
| language_code_au=language_code | |
| if is_manually: | |
| # we try filter for manually created transcripts | |
| print('We extract manually created transcripts') | |
| transcript = transcript_list.find_manually_created_transcript([language_code]) | |
| elif is_automatic: | |
| print('We extract generated transcript') | |
| # or automatically generated ones, but not translated | |
| transcript = transcript_list.find_generated_transcript([language_code]) | |
| else: | |
| print('We try find the transcript') | |
| # we directly filter for the language you are looking for, using the transcript list | |
| transcript = transcript_list.find_transcript([language_code]) | |
| is_translated = False | |
| if is_translatable : | |
| for available_trad in transcript_translation_languages[0]: | |
| if available_trad['language_code']==desired_language: | |
| print("It was found the translation for lang:",desired_language) | |
| print('We translate directly the transcript') | |
| transcript_translated = transcript.translate(desired_language) | |
| transcript_translated=transcript_translated.fetch() | |
| translated=clean_transcript(transcript_translated) | |
| is_translated = True | |
| script_translated = "" | |
| if is_translated : | |
| script_translated = translated | |
| transcript=transcript.fetch() | |
| script = clean_transcript(transcript) | |
| return script, script_translated, is_translated | |
| # Set environment variables | |
| home_dir = os.getcwd() | |
| temp_dir=os.path.join(home_dir, "temp") | |
| #Create temp directory | |
| pathlib.Path(temp_dir).mkdir(parents=True, exist_ok=True) | |
| os.environ['home_dir'] = home_dir | |
| os.environ['temp_dir'] = temp_dir | |
| def video_to_translate(url,initial_language,final_language): | |
| print('Checking the url') | |
| check =validate_youtube(url) | |
| if check is True: return "./demo/tryagain2.mp4" | |
| #Internal definitions | |
| if initial_language == "English": | |
| lang_in='en-US' | |
| lang_api='en' | |
| elif initial_language == "Italian": | |
| lang_in='it-IT' | |
| lang_api='it' | |
| elif initial_language == "Chinese": | |
| lang_in='zh-CN' | |
| lang_api='zh' | |
| elif initial_language == "Spanish": | |
| lang_in='es-MX' | |
| lang_api='es' | |
| elif initial_language == "Russian": | |
| lang_in='ru-RU' | |
| lang_api='rus' | |
| elif initial_language == "German": | |
| lang_in='de-DE' | |
| lang_api='de' | |
| elif initial_language == "Japanese": | |
| lang_in='ja-JP' | |
| lang_api='ja' | |
| if final_language == "English": | |
| lang='en' | |
| elif final_language == "Italian": | |
| lang='it' | |
| elif final_language == "Spanish": | |
| lang='es' | |
| elif final_language == "Russian": | |
| lang='ru' | |
| elif final_language == "German": | |
| lang='de' | |
| elif final_language == "Vietnamese": | |
| lang='vi' | |
| elif final_language == "Japanese": | |
| lang='ja' | |
| # Initial directory | |
| home_dir= os.getenv('home_dir') | |
| print('Initial directory:',home_dir) | |
| # Cleaning previous files | |
| cleanup() | |
| file_obj=download_video(url) | |
| print(file_obj) | |
| # Insert Local Video File Path | |
| videoclip = VideoFileClip(file_obj) | |
| is_traduc=False | |
| # Trying to get transcripts | |
| text, trans, is_traduc = get_transcript(url,desired_language=lang) | |
| print("Transcript Found") | |
| if not is_traduc: | |
| print("No Transcript Found") | |
| # Trying to recognize audio | |
| # Insert Local Audio File Path | |
| videoclip.audio.write_audiofile("audio.wav",codec='pcm_s16le') | |
| # initialize the recognizer | |
| r = sr.Recognizer() | |
| # open the file | |
| with sr.AudioFile("audio.wav") as source: | |
| # listen for the data (load audio to memory) | |
| audio_data = r.record(source) | |
| # recognize (convert from speech to text) | |
| print("Recognize from ",lang_in) | |
| #There is a limit of 10 MB on all single requests sent to the API using local file | |
| size_wav=getSize("audio.wav") | |
| if size_wav > 50000000: | |
| print("The wav is too large") | |
| audio_chunks=split_audio_wav("audio.wav") | |
| text="" | |
| for chunk in audio_chunks: | |
| print("Converting audio to text",chunk) | |
| try: | |
| text_chunk= r.recognize_google(audio_data, language = lang_in) | |
| except Exception: | |
| print("This video cannot be recognized") | |
| cleanup() | |
| return "./demo/tryagain.mp4" | |
| text=text+text_chunk+" " | |
| text=str(text) | |
| print(type(text)) | |
| else: | |
| try: | |
| text = r.recognize_google(audio_data, language = lang_in) | |
| except Exception: | |
| print("This video cannot be recognized") | |
| cleanup() | |
| return "./demo/tryagain.mp4" | |
| #print(text) | |
| print("Destination language ",lang) | |
| # init the Google API translator | |
| translator = Translator() | |
| try: | |
| translation = translator.translate(text, dest=lang) | |
| except Exception: | |
| print("This text cannot be translated") | |
| cleanup() | |
| return "./demo/tryagain.mp4" | |
| #translation.text | |
| trans=translation.text | |
| myobj = gTTS(text=trans, lang=lang, slow=False) | |
| myobj.save("audio.wav") | |
| # loading audio file | |
| audioclip = AudioFileClip("audio.wav") | |
| # adding audio to the video clip | |
| new_audioclip = CompositeAudioClip([audioclip]) | |
| videoclip.audio = new_audioclip | |
| new_video="video_translated_"+lang+".mp4" | |
| # Return back to main directory | |
| os.chdir(home_dir) | |
| print('Final directory',os.getcwd()) | |
| videoclip.write_videofile(new_video) | |
| videoclip.close() | |
| del file_obj | |
| return new_video | |
| initial_language = gr.Dropdown(choices=["English", "Italian", "Japanese", "Russian", "Spanish", "German"], label="Initial Language") | |
| final_language = gr.Dropdown(choices=["Russian", "Italian", "Spanish", "German", "English", "Japanese"], label="Final Language") | |
| url = gr.Textbox(label="Enter the YouTube URL below:") | |
| gr.Interface( | |
| fn=video_to_translate, | |
| inputs=[url, initial_language, final_language], | |
| outputs="video", | |
| title="Video YouTube Translator", | |
| description="A simple application that translates YouTube small videos from English, Italian, Japanese, Russian, Spanish, and German to Italian, Spanish, Russian, English, and Japanese. Wait one minute to process.", | |
| article="""<div> | |
| <p style="text-align: center"> All you need to do is to paste the YouTube link and hit submit, then wait for compiling. After that, click on Play/Pause to listen to the video. The video is saved in an MP4 format. | |
| The length video limit is 10 minutes. For more information visit <a href="https://ruslanmv.com/">ruslanmv.com</a>. | |
| </p> | |
| </div>""", | |
| examples=[ | |
| ["https://youtu.be/uLVRZE8OAI4?si=LA08t9hUJHLYg8K_", "English", "Spanish"], | |
| ], | |
| ).launch() | |