|
|
import gradio as gr |
|
|
import os |
|
|
from dotenv import load_dotenv |
|
|
from tool import YouTubeTranscriptExtractor, TranscriptSummarizer, TranscriptToDocx |
|
|
|
|
|
|
|
|
load_dotenv() |
|
|
|
|
|
youtube_tool = YouTubeTranscriptExtractor() |
|
|
docx_tool = TranscriptToDocx() |
|
|
|
|
|
import sys |
|
|
print(sys.version) |
|
|
def process_youtube_video(video_url, hf_api_key, gemini_api_key, model_name, existing_docx_path=None): |
|
|
|
|
|
summarizer_tool = TranscriptSummarizer( |
|
|
hf_api_key=hf_api_key, |
|
|
gemini_api_key=gemini_api_key, |
|
|
model_name=model_name |
|
|
) |
|
|
|
|
|
|
|
|
from pytubefix import YouTube |
|
|
try: |
|
|
yt = YouTube(video_url) |
|
|
video_title = yt.title |
|
|
except Exception: |
|
|
video_title = "YouTube Video" |
|
|
|
|
|
|
|
|
transcript_result = youtube_tool.forward(video_url=video_url) |
|
|
|
|
|
|
|
|
|
|
|
try: |
|
|
if "LANGUAGE:" in transcript_result and "||" in transcript_result: |
|
|
parts = transcript_result.split("||", 1) |
|
|
language = parts[0].replace("LANGUAGE:", "").strip() |
|
|
transcript = parts[1] |
|
|
print(f"Detected language: {language}") |
|
|
else: |
|
|
|
|
|
transcript = transcript_result |
|
|
language = "en" |
|
|
print("Warning: Could not detect language, using English as default") |
|
|
except Exception as e: |
|
|
transcript = transcript_result if isinstance(transcript_result, str) else "Error extracting transcript" |
|
|
language = "en" |
|
|
print(f"Warning: Error parsing transcript data: {str(e)}, using English as default") |
|
|
|
|
|
|
|
|
summary_and_blog = summarizer_tool.forward(transcript=transcript, language=language) |
|
|
try: |
|
|
if "\n\nImage URL: " in summary_and_blog: |
|
|
summary, image_url = summary_and_blog.split("\n\nImage URL: ") |
|
|
else: |
|
|
summary = summary_and_blog |
|
|
image_url = None |
|
|
except Exception: |
|
|
summary = summary_and_blog |
|
|
image_url = None |
|
|
|
|
|
|
|
|
|
|
|
docx_file_path = None |
|
|
if existing_docx_path is not None and existing_docx_path != "" and existing_docx_path != []: |
|
|
|
|
|
if isinstance(existing_docx_path, str) and os.path.exists(existing_docx_path): |
|
|
docx_file_path = existing_docx_path |
|
|
|
|
|
elif hasattr(existing_docx_path, 'name') and os.path.exists(existing_docx_path.name): |
|
|
docx_file_path = existing_docx_path.name |
|
|
|
|
|
elif isinstance(existing_docx_path, list) and len(existing_docx_path) > 0 and existing_docx_path[0] is not None: |
|
|
if isinstance(existing_docx_path[0], str) and os.path.exists(existing_docx_path[0]): |
|
|
docx_file_path = existing_docx_path[0] |
|
|
elif hasattr(existing_docx_path[0], 'name') and os.path.exists(existing_docx_path[0].name): |
|
|
docx_file_path = existing_docx_path[0].name |
|
|
|
|
|
docx_path = docx_tool.forward( |
|
|
transcript=transcript, |
|
|
summary=summary, |
|
|
video_title=video_title, |
|
|
image_path=image_url, |
|
|
existing_docx_path=docx_file_path |
|
|
) |
|
|
|
|
|
return transcript, summary, image_url, docx_path |
|
|
|
|
|
with gr.Blocks() as demo: |
|
|
gr.Markdown("# YouTube Transcript Summarizer and Blog Content Generator") |
|
|
gr.Markdown("Enter a YouTube video URL and Hugging Face API Key to extract the transcript, summarize it, and generate blog content with an image and DOCX file. Optionally, you can provide an existing DOCX file to update.") |
|
|
|
|
|
|
|
|
gr.Markdown("### API Keys and Model Configuration") |
|
|
gr.Markdown("You need to provide both a Hugging Face API key (for image generation) and a Gemini API key (for summarization). You can now enter your Gemini API key directly in the UI or set it in the .env file.") |
|
|
|
|
|
with gr.Row(): |
|
|
with gr.Column(): |
|
|
video_url = gr.Textbox(label="YouTube Video URL") |
|
|
|
|
|
|
|
|
gr.Markdown("#### API Keys") |
|
|
hf_api_key = gr.Textbox(label="Hugging Face API Key", type="password") |
|
|
gemini_api_key = gr.Textbox( |
|
|
label="Gemini API Key", |
|
|
type="password", |
|
|
value=os.getenv("GEMINI_API_KEY", "") |
|
|
) |
|
|
|
|
|
|
|
|
model_name = gr.Dropdown( |
|
|
label="Gemini Model", |
|
|
choices=["gemini-2.0-flash", "gemini-1.5-pro", "gemini-1.5-flash"], |
|
|
value="gemini-2.0-flash" |
|
|
) |
|
|
|
|
|
existing_docx = gr.File(label="Existing DOCX file (optional)", file_types=[".docx"]) |
|
|
submit_btn = gr.Button("Process Video") |
|
|
|
|
|
with gr.Column(): |
|
|
transcript_output = gr.Textbox(label="Transcript") |
|
|
summary_output = gr.Textbox(label="Summary and Blog Content") |
|
|
image_output = gr.Image(label="Generated Image", image_mode="RGBA") |
|
|
docx_output = gr.File(label="Generated DOCX File") |
|
|
|
|
|
submit_btn.click( |
|
|
fn=process_youtube_video, |
|
|
inputs=[video_url, hf_api_key, gemini_api_key, model_name, existing_docx], |
|
|
outputs=[transcript_output, summary_output, image_output, docx_output] |
|
|
) |
|
|
|
|
|
iface = demo |
|
|
|
|
|
iface.launch() |
|
|
|