""" NewPress AI - Johnny Harris Script Assistant A Gradio app that uses a Supabase vector database of Johnny Harris transcripts to: 1. Search if topics have been covered before 2. Generate scripts in Johnny's voice from bullet points """ import os import gradio as gr from dotenv import load_dotenv from src.vectorstore import TranscriptVectorStore, create_vectorstore from src.llm_client import InferenceProviderClient, create_llm_client from src.prompts import ( TOPIC_SEARCH_SYSTEM_PROMPT, SCRIPT_SYSTEM_PROMPT, TONE_CHECK_SYSTEM_PROMPT, get_topic_search_prompt, get_script_prompt, get_tone_check_prompt ) # Load environment variables load_dotenv() # Initialize components (lazy loading) vectorstore = None llm_client = None def get_vectorstore() -> TranscriptVectorStore: """Get or create the vector store instance""" global vectorstore if vectorstore is None: vectorstore = create_vectorstore() return vectorstore def get_llm_client() -> InferenceProviderClient: """Get or create the LLM client instance""" global llm_client if llm_client is None: llm_client = create_llm_client() return llm_client # ============================================================================= # TAB 1: TOPIC SEARCH # ============================================================================= def expand_query(query: str) -> list: """Use LLM to generate related search terms for broader coverage""" try: llm = get_llm_client() prompt = f"""Given this search query about Johnny Harris video topics: "{query}" Generate 2-3 closely related search terms that might find relevant videos. Focus on: the core topic, key entities mentioned, and one closely related concept. Return ONLY the terms, one per line, no numbering or explanation.""" response = llm.generate(prompt, max_tokens=60, temperature=0.3) terms = [t.strip() for t in response.strip().split('\n') if t.strip()] return [query] + terms[:3] except Exception: return [query] def search_topics(query: str, progress=gr.Progress()): """ Generator that yields progress updates during search. Uses tiered results: direct matches and related content. Args: query: User's topic or question progress: Gradio progress tracker Yields: Progress status messages, then final search results """ if not query or not query.strip(): yield "Please enter a topic or question to search." return try: vs = get_vectorstore() # Expand query using LLM progress(0.1, desc="Expanding search query...") yield "Expanding search query..." search_terms = expand_query(query.strip()) # Collect tiered results from all search terms all_direct = [] all_related = [] seen_videos = set() total_terms = len(search_terms) for i, term in enumerate(search_terms): pct = 0.2 + (0.5 * (i / total_terms)) progress(pct, desc=f"Searching: {term[:30]}...") yield f"Searching: {term[:30]}..." direct, related = vs.tiered_similarity_search( query=term, direct_threshold=0.6, related_threshold=0.3, max_per_tier=10 ) # Add results, deduplicating by video for chunk in direct: if chunk.video_id not in seen_videos: seen_videos.add(chunk.video_id) all_direct.append(chunk) for chunk in related: if chunk.video_id not in seen_videos: seen_videos.add(chunk.video_id) all_related.append(chunk) progress(0.8, desc="Processing results...") yield "Processing results..." # Sort each tier by similarity all_direct = sorted(all_direct, key=lambda x: x.similarity, reverse=True)[:10] all_related = sorted(all_related, key=lambda x: x.similarity, reverse=True)[:10] if not all_direct and not all_related: yield f"No matching content found for: **{query}**\n\nThis topic may not have been covered yet, or try rephrasing your search." return # Format tiered output output_parts = [] search_info = f"*Searched: {', '.join(search_terms)}*\n\n" output_parts.append(f"## Search Results for: \"{query}\"\n\n{search_info}") if all_direct: output_parts.append("### Direct Matches\nVideos that directly cover this topic:\n") output_parts.append(vs.format_results_for_display(all_direct)) if all_related: if all_direct: output_parts.append("\n---\n") output_parts.append("### Related Content\nVideos that touch on similar themes:\n") output_parts.append(vs.format_results_for_display(all_related)) progress(1.0, desc="Done!") yield "\n".join(output_parts) except Exception as e: yield f"Error searching: {str(e)}" # ============================================================================= # TAB 2: TONE CHECKER # ============================================================================= def check_script_tone(user_script: str, progress=gr.Progress()): """ Generator that yields progress updates during tone analysis. Args: user_script: User's script to analyze progress: Gradio progress tracker Yields: Progress status messages, then final tone analysis """ if not user_script or not user_script.strip(): yield "Please enter a script to analyze." return try: progress(0.05, desc="Gathering style references...") yield "Gathering style references from Johnny's archive..." vs = get_vectorstore() llm = get_llm_client() progress(0.15, desc="Searching knowledge base...") yield "Searching knowledge base for style references..." context_chunks = vs.get_bulk_style_context( topic_query=user_script.strip()[:500], # Use first 500 chars as topic hint max_chunks=50, topic_relevant_ratio=0.4 ) progress(0.35, desc="Preparing context...") yield "Preparing context for analysis..." context = vs.format_context_for_llm(context_chunks) if context_chunks else "" progress(0.5, desc="Building prompt...") yield "Building analysis prompt..." prompt_template = get_tone_check_prompt() prompt = prompt_template.format( user_script=user_script.strip(), context=context ) progress(0.7, desc="Analyzing tone (30-60 seconds)...") yield "Analyzing script tone (this may take 30-60 seconds)..." analysis = llm.generate( prompt=prompt, system_prompt=TONE_CHECK_SYSTEM_PROMPT, temperature=0.3, max_tokens=1500 ) progress(1.0, desc="Complete!") yield analysis.strip() except Exception as e: yield f"**Error:** {str(e)}" # ============================================================================= # GRADIO INTERFACE # ============================================================================= def create_app(): """Create and configure the Gradio application""" with gr.Blocks( title="NewPress AI - Johnny Harris Script Assistant" ) as app: app.queue() # Enable queue before defining event handlers for progress to work gr.Markdown(""" # NewPress AI ### Johnny Harris Script Assistant Use Johnny's archive of hundreds of video transcripts to: - **Search** if a topic has been covered before - **Generate** scripts in Johnny's voice from your notes """) with gr.Tabs(): # ================================================================= # TAB 1: TOPIC SEARCH # ================================================================= with gr.TabItem("Topic Search"): gr.Markdown(""" ### Has Johnny covered this topic? Search the archive to see if a topic has been addressed in previous videos. """) with gr.Row(): with gr.Column(scale=3): topic_input = gr.Textbox( label="Topic or Question", placeholder="e.g., Why do borders exist? or US immigration policy", lines=2 ) with gr.Column(scale=1): search_btn = gr.Button("Search", variant="primary", size="lg") search_output = gr.Markdown(label="Search Results", value="Search results will appear here...") search_btn.click( fn=search_topics, inputs=[topic_input], outputs=[search_output], show_progress="full" ) topic_input.submit( fn=search_topics, inputs=[topic_input], outputs=[search_output], show_progress="full" ) # ================================================================= # TAB 2: TONE CHECKER # ================================================================= with gr.TabItem("Tone Checker"): gr.Markdown(""" ### Check if your script matches Johnny's voice Paste your script below to analyze how well it matches Johnny Harris's signature style. Get a score and specific feedback on what works and what to improve. """) with gr.Row(): with gr.Column(): script_input = gr.Textbox( label="Your Script", placeholder="""Paste your script here... Example: There's this line on the map that most people have never heard of. It's called the Durand Line, and it cuts right through the middle of a people who have lived in these mountains for thousands of years. The thing is, this line wasn't drawn by the people who live here...""", lines=15 ) check_btn = gr.Button("Check Tone", variant="primary", size="lg") tone_output = gr.Markdown(label="Tone Analysis", value="Tone analysis will appear here...") check_btn.click( fn=check_script_tone, inputs=[script_input], outputs=[tone_output], show_progress="full" ) script_input.submit( fn=check_script_tone, inputs=[script_input], outputs=[tone_output], show_progress="full" ) gr.Markdown(""" --- *Powered by Johnny Harris's transcript archive, Jina AI embeddings, and Qwen-2.5-72B* """) return app # ============================================================================= # MAIN # ============================================================================= # Create app at module level for `gradio app.py` CLI compatibility demo = create_app() if __name__ == "__main__": demo.launch( server_name="0.0.0.0", server_port=7860, share=False )