import gradio as gr import requests import time import os # Use localhost for HF Spaces since both services run in the same container API_BASE_URL = "http://localhost:8000" def extract_links(url): """Extract links from the given URL""" endpoint = f"{API_BASE_URL}/extract_links" payload = {"url": url} try: response = requests.post(endpoint, json=payload, timeout=30) if response.status_code == 200: return response.json()["unique_links"] else: raise Exception(f"Failed to extract links: {response.text}") except requests.exceptions.RequestException as e: raise Exception(f"Connection error: {str(e)}") def extract_text(urls): """Extract text from URLs""" endpoint = f"{API_BASE_URL}/extract_text" try: response = requests.post(endpoint, json=urls, timeout=60) if response.status_code == 200: return response.json()["file_saved"] else: raise Exception(f"Failed to extract text: {response.text}") except requests.exceptions.RequestException as e: raise Exception(f"Connection error: {str(e)}") def perform_rag(file_path, prompt): """Perform RAG on the extracted text""" endpoint = f"{API_BASE_URL}/rag" payload = {"file_path": file_path, "prompt": prompt} try: response = requests.post(endpoint, json=payload, timeout=60) if response.status_code == 200: return response.json() else: raise Exception(f"Failed to perform RAG: {response.text}") except requests.exceptions.RequestException as e: raise Exception(f"Connection error: {str(e)}") def check_api_health(): """Check if FastAPI is running""" try: response = requests.get(f"{API_BASE_URL}/", timeout=5) return response.status_code == 200 except: return False def process_web_rag(url, prompt, data_source, progress=gr.Progress()): """Main processing function with progress tracking""" if not url or not prompt: return "❌ Error: Please provide both URL and prompt", "", "" # Check API health first if not check_api_health(): return "❌ Error: FastAPI service is not available. Please wait a moment and try again.", "", "" try: progress(0.1, desc="Starting process...") if data_source == "Multiple links (first 5)": progress(0.2, desc="🔍 Extracting links from webpage...") links = extract_links(url) sample_links = links[:5] progress(0.4, desc="📄 Extracting text from multiple pages...") file_path = extract_text(sample_links) status_msg = f"✅ Processed {len(sample_links)} pages from {len(links)} total links found" else: progress(0.3, desc="📄 Extracting text from homepage...") file_path = extract_text([url]) status_msg = "✅ Processed homepage content" progress(0.7, desc="🤖 Performing RAG analysis...") result = perform_rag(file_path, prompt) progress(1.0, desc="✅ Complete!") # Format the response response_text = f"**Query:** {result['user_query']}\n\n**Response:** {result['assistant_response']}" sources_text = result['sources'] return status_msg, response_text, sources_text except Exception as e: return f"❌ Error: {str(e)}", "", "" # Custom CSS for modern styling custom_css = """ .gradio-container { max-width: 900px !important; margin: auto !important; } .header-text { text-align: center; background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); -webkit-background-clip: text; -webkit-text-fill-color: transparent; font-size: 2.5em; font-weight: bold; margin-bottom: 0.5em; } .description-text { text-align: center; color: #666; font-size: 1.1em; margin-bottom: 2em; } .input-group { background: #f8f9fa; padding: 1.5em; border-radius: 12px; margin: 1em 0; border: 1px solid #e9ecef; } .output-group { background: #ffffff; border-radius: 12px; border: 1px solid #dee2e6; margin: 1em 0; } .status-box { padding: 1em; border-radius: 8px; margin: 0.5em 0; } .status-success { background-color: #d4edda; border-color: #c3e6cb; color: #155724; } .status-error { background-color: #f8d7da; border-color: #f5c6cb; color: #721c24; } """ # Create the Gradio interface with gr.Blocks(css=custom_css, title="Web RAG System", theme=gr.themes.Soft()) as app: # Header gr.HTML("""
🌐 Web RAG System
Extract content from web pages and ask questions using AI-powered retrieval
""") with gr.Row(): with gr.Column(scale=1): # Input section gr.HTML('
📝 Input Configuration
') url_input = gr.Textbox( label="🔗 Website URL", placeholder="https://example.com", info="Enter the URL you want to analyze" ) prompt_input = gr.Textbox( label="❓ Your Question", placeholder="What is this website about?", lines=3, info="Ask any question about the content" ) data_source = gr.Radio( choices=["Multiple links (first 5)", "Homepage only"], value="Multiple links (first 5)", label="📊 Data Source", info="Choose how much content to analyze" ) process_btn = gr.Button( "🚀 Analyze Website", variant="primary", size="lg" ) # Output section gr.HTML('
📋 Results
') status_output = gr.Textbox( label="📊 Processing Status", interactive=False, show_label=True ) with gr.Row(): with gr.Column(scale=2): response_output = gr.Textbox( label="🤖 AI Response", lines=8, interactive=False, show_label=True ) with gr.Column(scale=1): sources_output = gr.Textbox( label="📚 Sources", lines=8, interactive=False, show_label=True ) # Example section gr.HTML("""

💡 Example Usage

URL: https://openai.com

Question: What are the main products and services offered?

Data Source: Multiple links (first 5)

""") # Add a note about the system status gr.HTML("""

â„šī¸ Note: If you encounter connection errors, please wait a moment for the system to initialize and try again.

""") # Connect the function process_btn.click( fn=process_web_rag, inputs=[url_input, prompt_input, data_source], outputs=[status_output, response_output, sources_output], show_progress=True ) # Add keyboard shortcut url_input.submit( fn=process_web_rag, inputs=[url_input, prompt_input, data_source], outputs=[status_output, response_output, sources_output], show_progress=True ) prompt_input.submit( fn=process_web_rag, inputs=[url_input, prompt_input, data_source], outputs=[status_output, response_output, sources_output], show_progress=True ) if __name__ == "__main__": app.launch( server_name="0.0.0.0", server_port=7860, share=False, show_error=True, quiet=False )