Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import requests | |
| import time | |
| import os | |
| # Use localhost for HF Spaces since both services run in the same container | |
| API_BASE_URL = "http://localhost:8000" | |
| def extract_links(url): | |
| """Extract links from the given URL""" | |
| endpoint = f"{API_BASE_URL}/extract_links" | |
| payload = {"url": url} | |
| try: | |
| response = requests.post(endpoint, json=payload, timeout=30) | |
| if response.status_code == 200: | |
| return response.json()["unique_links"] | |
| else: | |
| raise Exception(f"Failed to extract links: {response.text}") | |
| except requests.exceptions.RequestException as e: | |
| raise Exception(f"Connection error: {str(e)}") | |
| def extract_text(urls): | |
| """Extract text from URLs""" | |
| endpoint = f"{API_BASE_URL}/extract_text" | |
| try: | |
| response = requests.post(endpoint, json=urls, timeout=60) | |
| if response.status_code == 200: | |
| return response.json()["file_saved"] | |
| else: | |
| raise Exception(f"Failed to extract text: {response.text}") | |
| except requests.exceptions.RequestException as e: | |
| raise Exception(f"Connection error: {str(e)}") | |
| def perform_rag(file_path, prompt): | |
| """Perform RAG on the extracted text""" | |
| endpoint = f"{API_BASE_URL}/rag" | |
| payload = {"file_path": file_path, "prompt": prompt} | |
| try: | |
| response = requests.post(endpoint, json=payload, timeout=60) | |
| if response.status_code == 200: | |
| return response.json() | |
| else: | |
| raise Exception(f"Failed to perform RAG: {response.text}") | |
| except requests.exceptions.RequestException as e: | |
| raise Exception(f"Connection error: {str(e)}") | |
| def check_api_health(): | |
| """Check if FastAPI is running""" | |
| try: | |
| response = requests.get(f"{API_BASE_URL}/", timeout=5) | |
| return response.status_code == 200 | |
| except: | |
| return False | |
| def process_web_rag(url, prompt, data_source, progress=gr.Progress()): | |
| """Main processing function with progress tracking""" | |
| if not url or not prompt: | |
| return "β Error: Please provide both URL and prompt", "", "" | |
| # Check API health first | |
| if not check_api_health(): | |
| return "β Error: FastAPI service is not available. Please wait a moment and try again.", "", "" | |
| try: | |
| progress(0.1, desc="Starting process...") | |
| if data_source == "Multiple links (first 5)": | |
| progress(0.2, desc="π Extracting links from webpage...") | |
| links = extract_links(url) | |
| sample_links = links[:5] | |
| progress(0.4, desc="π Extracting text from multiple pages...") | |
| file_path = extract_text(sample_links) | |
| status_msg = f"β Processed {len(sample_links)} pages from {len(links)} total links found" | |
| else: | |
| progress(0.3, desc="π Extracting text from homepage...") | |
| file_path = extract_text([url]) | |
| status_msg = "β Processed homepage content" | |
| progress(0.7, desc="π€ Performing RAG analysis...") | |
| result = perform_rag(file_path, prompt) | |
| progress(1.0, desc="β Complete!") | |
| # Format the response | |
| response_text = f"**Query:** {result['user_query']}\n\n**Response:** {result['assistant_response']}" | |
| sources_text = result['sources'] | |
| return status_msg, response_text, sources_text | |
| except Exception as e: | |
| return f"β Error: {str(e)}", "", "" | |
| # Custom CSS for modern styling | |
| custom_css = """ | |
| .gradio-container { | |
| max-width: 900px !important; | |
| margin: auto !important; | |
| } | |
| .header-text { | |
| text-align: center; | |
| background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); | |
| -webkit-background-clip: text; | |
| -webkit-text-fill-color: transparent; | |
| font-size: 2.5em; | |
| font-weight: bold; | |
| margin-bottom: 0.5em; | |
| } | |
| .description-text { | |
| text-align: center; | |
| color: #666; | |
| font-size: 1.1em; | |
| margin-bottom: 2em; | |
| } | |
| .input-group { | |
| background: #f8f9fa; | |
| padding: 1.5em; | |
| border-radius: 12px; | |
| margin: 1em 0; | |
| border: 1px solid #e9ecef; | |
| } | |
| .output-group { | |
| background: #ffffff; | |
| border-radius: 12px; | |
| border: 1px solid #dee2e6; | |
| margin: 1em 0; | |
| } | |
| .status-box { | |
| padding: 1em; | |
| border-radius: 8px; | |
| margin: 0.5em 0; | |
| } | |
| .status-success { | |
| background-color: #d4edda; | |
| border-color: #c3e6cb; | |
| color: #155724; | |
| } | |
| .status-error { | |
| background-color: #f8d7da; | |
| border-color: #f5c6cb; | |
| color: #721c24; | |
| } | |
| """ | |
| # Create the Gradio interface | |
| with gr.Blocks(css=custom_css, title="Web RAG System", theme=gr.themes.Soft()) as app: | |
| # Header | |
| gr.HTML(""" | |
| <div class="header-text">π Web RAG System</div> | |
| <div class="description-text"> | |
| Extract content from web pages and ask questions using AI-powered retrieval | |
| </div> | |
| """) | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| # Input section | |
| gr.HTML('<div style="font-size: 1.2em; font-weight: bold; margin-bottom: 1em;">π Input Configuration</div>') | |
| url_input = gr.Textbox( | |
| label="π Website URL", | |
| placeholder="https://example.com", | |
| info="Enter the URL you want to analyze" | |
| ) | |
| prompt_input = gr.Textbox( | |
| label="β Your Question", | |
| placeholder="What is this website about?", | |
| lines=3, | |
| info="Ask any question about the content" | |
| ) | |
| data_source = gr.Radio( | |
| choices=["Multiple links (first 5)", "Homepage only"], | |
| value="Multiple links (first 5)", | |
| label="π Data Source", | |
| info="Choose how much content to analyze" | |
| ) | |
| process_btn = gr.Button( | |
| "π Analyze Website", | |
| variant="primary", | |
| size="lg" | |
| ) | |
| # Output section | |
| gr.HTML('<div style="font-size: 1.2em; font-weight: bold; margin: 2em 0 1em 0;">π Results</div>') | |
| status_output = gr.Textbox( | |
| label="π Processing Status", | |
| interactive=False, | |
| show_label=True | |
| ) | |
| with gr.Row(): | |
| with gr.Column(scale=2): | |
| response_output = gr.Textbox( | |
| label="π€ AI Response", | |
| lines=8, | |
| interactive=False, | |
| show_label=True | |
| ) | |
| with gr.Column(scale=1): | |
| sources_output = gr.Textbox( | |
| label="π Sources", | |
| lines=8, | |
| interactive=False, | |
| show_label=True | |
| ) | |
| # Example section | |
| gr.HTML(""" | |
| <div style="margin-top: 2em; padding: 1.5em; background: #f8f9fa; border-radius: 12px; border-left: 4px solid #667eea;"> | |
| <h3 style="margin-top: 0; color: #333;">π‘ Example Usage</h3> | |
| <p><strong>URL:</strong> https://openai.com</p> | |
| <p><strong>Question:</strong> What are the main products and services offered?</p> | |
| <p><strong>Data Source:</strong> Multiple links (first 5)</p> | |
| </div> | |
| """) | |
| # Add a note about the system status | |
| gr.HTML(""" | |
| <div style="margin-top: 1em; padding: 1em; background: #e3f2fd; border-radius: 8px; border-left: 4px solid #2196f3;"> | |
| <p style="margin: 0; color: #0d47a1;"> | |
| βΉοΈ <strong>Note:</strong> If you encounter connection errors, please wait a moment for the system to initialize and try again. | |
| </p> | |
| </div> | |
| """) | |
| # Connect the function | |
| process_btn.click( | |
| fn=process_web_rag, | |
| inputs=[url_input, prompt_input, data_source], | |
| outputs=[status_output, response_output, sources_output], | |
| show_progress=True | |
| ) | |
| # Add keyboard shortcut | |
| url_input.submit( | |
| fn=process_web_rag, | |
| inputs=[url_input, prompt_input, data_source], | |
| outputs=[status_output, response_output, sources_output], | |
| show_progress=True | |
| ) | |
| prompt_input.submit( | |
| fn=process_web_rag, | |
| inputs=[url_input, prompt_input, data_source], | |
| outputs=[status_output, response_output, sources_output], | |
| show_progress=True | |
| ) | |
| if __name__ == "__main__": | |
| app.launch( | |
| server_name="0.0.0.0", | |
| server_port=7860, | |
| share=False, | |
| show_error=True, | |
| quiet=False | |
| ) |