Spaces:
Running
Running
| """ | |
| NewPress AI - Johnny Harris Script Assistant | |
| A Gradio app that uses a Supabase vector database of Johnny Harris transcripts to: | |
| 1. Search if topics have been covered before | |
| 2. Generate scripts in Johnny's voice from bullet points | |
| """ | |
| import os | |
| import gradio as gr | |
| from dotenv import load_dotenv | |
| from src.vectorstore import TranscriptVectorStore, create_vectorstore | |
| from src.llm_client import InferenceProviderClient, create_llm_client | |
| from src.prompts import ( | |
| TOPIC_SEARCH_SYSTEM_PROMPT, | |
| SCRIPT_SYSTEM_PROMPT, | |
| TONE_CHECK_SYSTEM_PROMPT, | |
| get_topic_search_prompt, | |
| get_script_prompt, | |
| get_tone_check_prompt | |
| ) | |
| # Load environment variables | |
| load_dotenv() | |
| # Initialize components (lazy loading) | |
| vectorstore = None | |
| llm_client = None | |
| def get_vectorstore() -> TranscriptVectorStore: | |
| """Get or create the vector store instance""" | |
| global vectorstore | |
| if vectorstore is None: | |
| vectorstore = create_vectorstore() | |
| return vectorstore | |
| def get_llm_client() -> InferenceProviderClient: | |
| """Get or create the LLM client instance""" | |
| global llm_client | |
| if llm_client is None: | |
| llm_client = create_llm_client() | |
| return llm_client | |
| # ============================================================================= | |
| # TAB 1: TOPIC SEARCH | |
| # ============================================================================= | |
| def expand_query(query: str) -> list: | |
| """Use LLM to generate related search terms for broader coverage""" | |
| try: | |
| llm = get_llm_client() | |
| prompt = f"""Given this search query about Johnny Harris video topics: "{query}" | |
| Generate 2-3 closely related search terms that might find relevant videos. | |
| Focus on: the core topic, key entities mentioned, and one closely related concept. | |
| Return ONLY the terms, one per line, no numbering or explanation.""" | |
| response = llm.generate(prompt, max_tokens=60, temperature=0.3) | |
| terms = [t.strip() for t in response.strip().split('\n') if t.strip()] | |
| return [query] + terms[:3] | |
| except Exception: | |
| return [query] | |
| def search_topics(query: str, progress=gr.Progress()): | |
| """ | |
| Generator that yields progress updates during search. | |
| Uses tiered results: direct matches and related content. | |
| Args: | |
| query: User's topic or question | |
| progress: Gradio progress tracker | |
| Yields: | |
| Progress status messages, then final search results | |
| """ | |
| if not query or not query.strip(): | |
| yield "Please enter a topic or question to search." | |
| return | |
| try: | |
| vs = get_vectorstore() | |
| # Expand query using LLM | |
| progress(0.1, desc="Expanding search query...") | |
| yield "Expanding search query..." | |
| search_terms = expand_query(query.strip()) | |
| # Collect tiered results from all search terms | |
| all_direct = [] | |
| all_related = [] | |
| seen_videos = set() | |
| total_terms = len(search_terms) | |
| for i, term in enumerate(search_terms): | |
| pct = 0.2 + (0.5 * (i / total_terms)) | |
| progress(pct, desc=f"Searching: {term[:30]}...") | |
| yield f"Searching: {term[:30]}..." | |
| direct, related = vs.tiered_similarity_search( | |
| query=term, | |
| direct_threshold=0.6, | |
| related_threshold=0.3, | |
| max_per_tier=10 | |
| ) | |
| # Add results, deduplicating by video | |
| for chunk in direct: | |
| if chunk.video_id not in seen_videos: | |
| seen_videos.add(chunk.video_id) | |
| all_direct.append(chunk) | |
| for chunk in related: | |
| if chunk.video_id not in seen_videos: | |
| seen_videos.add(chunk.video_id) | |
| all_related.append(chunk) | |
| progress(0.8, desc="Processing results...") | |
| yield "Processing results..." | |
| # Sort each tier by similarity | |
| all_direct = sorted(all_direct, key=lambda x: x.similarity, reverse=True)[:10] | |
| all_related = sorted(all_related, key=lambda x: x.similarity, reverse=True)[:10] | |
| if not all_direct and not all_related: | |
| yield f"No matching content found for: **{query}**\n\nThis topic may not have been covered yet, or try rephrasing your search." | |
| return | |
| # Format tiered output | |
| output_parts = [] | |
| search_info = f"*Searched: {', '.join(search_terms)}*\n\n" | |
| output_parts.append(f"## Search Results for: \"{query}\"\n\n{search_info}") | |
| if all_direct: | |
| output_parts.append("### Direct Matches\nVideos that directly cover this topic:\n") | |
| output_parts.append(vs.format_results_for_display(all_direct)) | |
| if all_related: | |
| if all_direct: | |
| output_parts.append("\n---\n") | |
| output_parts.append("### Related Content\nVideos that touch on similar themes:\n") | |
| output_parts.append(vs.format_results_for_display(all_related)) | |
| progress(1.0, desc="Done!") | |
| yield "\n".join(output_parts) | |
| except Exception as e: | |
| yield f"Error searching: {str(e)}" | |
| # ============================================================================= | |
| # TAB 2: TONE CHECKER | |
| # ============================================================================= | |
| def check_script_tone(user_script: str, progress=gr.Progress()): | |
| """ | |
| Generator that yields progress updates during tone analysis. | |
| Args: | |
| user_script: User's script to analyze | |
| progress: Gradio progress tracker | |
| Yields: | |
| Progress status messages, then final tone analysis | |
| """ | |
| if not user_script or not user_script.strip(): | |
| yield "Please enter a script to analyze." | |
| return | |
| try: | |
| progress(0.05, desc="Gathering style references...") | |
| yield "Gathering style references from Johnny's archive..." | |
| vs = get_vectorstore() | |
| llm = get_llm_client() | |
| progress(0.15, desc="Searching knowledge base...") | |
| yield "Searching knowledge base for style references..." | |
| context_chunks = vs.get_bulk_style_context( | |
| topic_query=user_script.strip()[:500], # Use first 500 chars as topic hint | |
| max_chunks=50, | |
| topic_relevant_ratio=0.4 | |
| ) | |
| progress(0.35, desc="Preparing context...") | |
| yield "Preparing context for analysis..." | |
| context = vs.format_context_for_llm(context_chunks) if context_chunks else "" | |
| progress(0.5, desc="Building prompt...") | |
| yield "Building analysis prompt..." | |
| prompt_template = get_tone_check_prompt() | |
| prompt = prompt_template.format( | |
| user_script=user_script.strip(), | |
| context=context | |
| ) | |
| progress(0.7, desc="Analyzing tone (30-60 seconds)...") | |
| yield "Analyzing script tone (this may take 30-60 seconds)..." | |
| analysis = llm.generate( | |
| prompt=prompt, | |
| system_prompt=TONE_CHECK_SYSTEM_PROMPT, | |
| temperature=0.3, | |
| max_tokens=1500 | |
| ) | |
| progress(1.0, desc="Complete!") | |
| yield analysis.strip() | |
| except Exception as e: | |
| yield f"**Error:** {str(e)}" | |
| # ============================================================================= | |
| # GRADIO INTERFACE | |
| # ============================================================================= | |
| def create_app(): | |
| """Create and configure the Gradio application""" | |
| with gr.Blocks( | |
| title="NewPress AI - Johnny Harris Script Assistant" | |
| ) as app: | |
| app.queue() # Enable queue before defining event handlers for progress to work | |
| gr.Markdown(""" | |
| # NewPress AI | |
| ### Johnny Harris Script Assistant | |
| Use Johnny's archive of hundreds of video transcripts to: | |
| - **Search** if a topic has been covered before | |
| - **Generate** scripts in Johnny's voice from your notes | |
| """) | |
| with gr.Tabs(): | |
| # ================================================================= | |
| # TAB 1: TOPIC SEARCH | |
| # ================================================================= | |
| with gr.TabItem("Topic Search"): | |
| gr.Markdown(""" | |
| ### Has Johnny covered this topic? | |
| Search the archive to see if a topic has been addressed in previous videos. | |
| """) | |
| with gr.Row(): | |
| with gr.Column(scale=3): | |
| topic_input = gr.Textbox( | |
| label="Topic or Question", | |
| placeholder="e.g., Why do borders exist? or US immigration policy", | |
| lines=2 | |
| ) | |
| with gr.Column(scale=1): | |
| search_btn = gr.Button("Search", variant="primary", size="lg") | |
| search_output = gr.Markdown(label="Search Results", value="Search results will appear here...") | |
| search_btn.click( | |
| fn=search_topics, | |
| inputs=[topic_input], | |
| outputs=[search_output], | |
| show_progress="full" | |
| ) | |
| topic_input.submit( | |
| fn=search_topics, | |
| inputs=[topic_input], | |
| outputs=[search_output], | |
| show_progress="full" | |
| ) | |
| # ================================================================= | |
| # TAB 2: TONE CHECKER | |
| # ================================================================= | |
| with gr.TabItem("Tone Checker"): | |
| gr.Markdown(""" | |
| ### Check if your script matches Johnny's voice | |
| Paste your script below to analyze how well it matches Johnny Harris's | |
| signature style. Get a score and specific feedback on what works and what to improve. | |
| """) | |
| with gr.Row(): | |
| with gr.Column(): | |
| script_input = gr.Textbox( | |
| label="Your Script", | |
| placeholder="""Paste your script here... | |
| Example: | |
| There's this line on the map that most people have never heard of. | |
| It's called the Durand Line, and it cuts right through the middle of a people | |
| who have lived in these mountains for thousands of years. | |
| The thing is, this line wasn't drawn by the people who live here...""", | |
| lines=15 | |
| ) | |
| check_btn = gr.Button("Check Tone", variant="primary", size="lg") | |
| tone_output = gr.Markdown(label="Tone Analysis", value="Tone analysis will appear here...") | |
| check_btn.click( | |
| fn=check_script_tone, | |
| inputs=[script_input], | |
| outputs=[tone_output], | |
| show_progress="full" | |
| ) | |
| script_input.submit( | |
| fn=check_script_tone, | |
| inputs=[script_input], | |
| outputs=[tone_output], | |
| show_progress="full" | |
| ) | |
| gr.Markdown(""" | |
| --- | |
| *Powered by Johnny Harris's transcript archive, Jina AI embeddings, and Qwen-2.5-72B* | |
| """) | |
| return app | |
| # ============================================================================= | |
| # MAIN | |
| # ============================================================================= | |
| # Create app at module level for `gradio app.py` CLI compatibility | |
| demo = create_app() | |
| if __name__ == "__main__": | |
| demo.launch( | |
| server_name="0.0.0.0", | |
| server_port=7860, | |
| share=False | |
| ) | |