import gradio as gr import requests import time import os # Use localhost for HF Spaces since both services run in the same container API_BASE_URL = "http://localhost:8000" def extract_links(url): """Extract links from the given URL""" endpoint = f"{API_BASE_URL}/extract_links" payload = {"url": url} try: response = requests.post(endpoint, json=payload, timeout=30) if response.status_code == 200: return response.json()["unique_links"] else: raise Exception(f"Failed to extract links: {response.text}") except requests.exceptions.RequestException as e: raise Exception(f"Connection error: {str(e)}") def extract_text(urls): """Extract text from URLs""" endpoint = f"{API_BASE_URL}/extract_text" try: response = requests.post(endpoint, json=urls, timeout=60) if response.status_code == 200: return response.json()["file_saved"] else: raise Exception(f"Failed to extract text: {response.text}") except requests.exceptions.RequestException as e: raise Exception(f"Connection error: {str(e)}") def perform_rag(file_path, prompt): """Perform RAG on the extracted text""" endpoint = f"{API_BASE_URL}/rag" payload = {"file_path": file_path, "prompt": prompt} try: response = requests.post(endpoint, json=payload, timeout=60) if response.status_code == 200: return response.json() else: raise Exception(f"Failed to perform RAG: {response.text}") except requests.exceptions.RequestException as e: raise Exception(f"Connection error: {str(e)}") def check_api_health(): """Check if FastAPI is running""" try: response = requests.get(f"{API_BASE_URL}/", timeout=5) return response.status_code == 200 except: return False def process_web_rag(url, prompt, data_source, progress=gr.Progress()): """Main processing function with progress tracking""" if not url or not prompt: return "â Error: Please provide both URL and prompt", "", "" # Check API health first if not check_api_health(): return "â Error: FastAPI service is not available. Please wait a moment and try again.", "", "" try: progress(0.1, desc="Starting process...") if data_source == "Multiple links (first 5)": progress(0.2, desc="đ Extracting links from webpage...") links = extract_links(url) sample_links = links[:5] progress(0.4, desc="đ Extracting text from multiple pages...") file_path = extract_text(sample_links) status_msg = f"â Processed {len(sample_links)} pages from {len(links)} total links found" else: progress(0.3, desc="đ Extracting text from homepage...") file_path = extract_text([url]) status_msg = "â Processed homepage content" progress(0.7, desc="đ¤ Performing RAG analysis...") result = perform_rag(file_path, prompt) progress(1.0, desc="â Complete!") # Format the response response_text = f"**Query:** {result['user_query']}\n\n**Response:** {result['assistant_response']}" sources_text = result['sources'] return status_msg, response_text, sources_text except Exception as e: return f"â Error: {str(e)}", "", "" # Custom CSS for modern styling custom_css = """ .gradio-container { max-width: 900px !important; margin: auto !important; } .header-text { text-align: center; background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); -webkit-background-clip: text; -webkit-text-fill-color: transparent; font-size: 2.5em; font-weight: bold; margin-bottom: 0.5em; } .description-text { text-align: center; color: #666; font-size: 1.1em; margin-bottom: 2em; } .input-group { background: #f8f9fa; padding: 1.5em; border-radius: 12px; margin: 1em 0; border: 1px solid #e9ecef; } .output-group { background: #ffffff; border-radius: 12px; border: 1px solid #dee2e6; margin: 1em 0; } .status-box { padding: 1em; border-radius: 8px; margin: 0.5em 0; } .status-success { background-color: #d4edda; border-color: #c3e6cb; color: #155724; } .status-error { background-color: #f8d7da; border-color: #f5c6cb; color: #721c24; } """ # Create the Gradio interface with gr.Blocks(css=custom_css, title="Web RAG System", theme=gr.themes.Soft()) as app: # Header gr.HTML("""
URL: https://openai.com
Question: What are the main products and services offered?
Data Source: Multiple links (first 5)
âšī¸ Note: If you encounter connection errors, please wait a moment for the system to initialize and try again.