Spaces:

MohamedFahim
/

URL-To-Answer

Sleeping

File size: 8,550 Bytes

ee16852

import gradio as gr
import requests
import time
import os

# Use localhost for HF Spaces since both services run in the same container
API_BASE_URL = "http://localhost:8000"

def extract_links(url):
    """Extract links from the given URL"""
    endpoint = f"{API_BASE_URL}/extract_links"
    payload = {"url": url}
    try:
        response = requests.post(endpoint, json=payload, timeout=30)
        if response.status_code == 200:
            return response.json()["unique_links"]
        else:
            raise Exception(f"Failed to extract links: {response.text}")
    except requests.exceptions.RequestException as e:
        raise Exception(f"Connection error: {str(e)}")

def extract_text(urls):
    """Extract text from URLs"""
    endpoint = f"{API_BASE_URL}/extract_text"
    try:
        response = requests.post(endpoint, json=urls, timeout=60)
        if response.status_code == 200:
            return response.json()["file_saved"]
        else:
            raise Exception(f"Failed to extract text: {response.text}")
    except requests.exceptions.RequestException as e:
        raise Exception(f"Connection error: {str(e)}")

def perform_rag(file_path, prompt):
    """Perform RAG on the extracted text"""
    endpoint = f"{API_BASE_URL}/rag"
    payload = {"file_path": file_path, "prompt": prompt}
    try:
        response = requests.post(endpoint, json=payload, timeout=60)
        if response.status_code == 200:
            return response.json()
        else:
            raise Exception(f"Failed to perform RAG: {response.text}")
    except requests.exceptions.RequestException as e:
        raise Exception(f"Connection error: {str(e)}")

def check_api_health():
    """Check if FastAPI is running"""
    try:
        response = requests.get(f"{API_BASE_URL}/", timeout=5)
        return response.status_code == 200
    except:
        return False

def process_web_rag(url, prompt, data_source, progress=gr.Progress()):
    """Main processing function with progress tracking"""
    if not url or not prompt:
        return "❌ Error: Please provide both URL and prompt", "", ""
    
    # Check API health first
    if not check_api_health():
        return "❌ Error: FastAPI service is not available. Please wait a moment and try again.", "", ""
    
    try:
        progress(0.1, desc="Starting process...")
        
        if data_source == "Multiple links (first 5)":
            progress(0.2, desc="🔍 Extracting links from webpage...")
            links = extract_links(url)
            sample_links = links[:5]
            
            progress(0.4, desc="📄 Extracting text from multiple pages...")
            file_path = extract_text(sample_links)
            
            status_msg = f"✅ Processed {len(sample_links)} pages from {len(links)} total links found"
        else:
            progress(0.3, desc="📄 Extracting text from homepage...")
            file_path = extract_text([url])
            status_msg = "✅ Processed homepage content"
        
        progress(0.7, desc="🤖 Performing RAG analysis...")
        result = perform_rag(file_path, prompt)
        
        progress(1.0, desc="✅ Complete!")
        
        # Format the response
        response_text = f"**Query:** {result['user_query']}\n\n**Response:** {result['assistant_response']}"
        sources_text = result['sources']
        
        return status_msg, response_text, sources_text
        
    except Exception as e:
        return f"❌ Error: {str(e)}", "", ""

# Custom CSS for modern styling
custom_css = """
.gradio-container {
    max-width: 900px !important;
    margin: auto !important;
}

.header-text {
    text-align: center;
    background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
    -webkit-background-clip: text;
    -webkit-text-fill-color: transparent;
    font-size: 2.5em;
    font-weight: bold;
    margin-bottom: 0.5em;
}

.description-text {
    text-align: center;
    color: #666;
    font-size: 1.1em;
    margin-bottom: 2em;
}

.input-group {
    background: #f8f9fa;
    padding: 1.5em;
    border-radius: 12px;
    margin: 1em 0;
    border: 1px solid #e9ecef;
}

.output-group {
    background: #ffffff;
    border-radius: 12px;
    border: 1px solid #dee2e6;
    margin: 1em 0;
}

.status-box {
    padding: 1em;
    border-radius: 8px;
    margin: 0.5em 0;
}

.status-success {
    background-color: #d4edda;
    border-color: #c3e6cb;
    color: #155724;
}

.status-error {
    background-color: #f8d7da;
    border-color: #f5c6cb;
    color: #721c24;
}
"""

# Create the Gradio interface
with gr.Blocks(css=custom_css, title="Web RAG System", theme=gr.themes.Soft()) as app:
    # Header
    gr.HTML("""
        <div class="header-text">🌐 Web RAG System</div>
        <div class="description-text">
            Extract content from web pages and ask questions using AI-powered retrieval
        </div>
    """)
    
    with gr.Row():
        with gr.Column(scale=1):
            # Input section
            gr.HTML('<div style="font-size: 1.2em; font-weight: bold; margin-bottom: 1em;">📝 Input Configuration</div>')
            
            url_input = gr.Textbox(
                label="🔗 Website URL",
                placeholder="https://example.com",
                info="Enter the URL you want to analyze"
            )
            
            prompt_input = gr.Textbox(
                label="❓ Your Question",
                placeholder="What is this website about?",
                lines=3,
                info="Ask any question about the content"
            )
            
            data_source = gr.Radio(
                choices=["Multiple links (first 5)", "Homepage only"],
                value="Multiple links (first 5)",
                label="📊 Data Source",
                info="Choose how much content to analyze"
            )
            
            process_btn = gr.Button(
                "🚀 Analyze Website",
                variant="primary",
                size="lg"
            )
    
    # Output section
    gr.HTML('<div style="font-size: 1.2em; font-weight: bold; margin: 2em 0 1em 0;">📋 Results</div>')
    
    status_output = gr.Textbox(
        label="📊 Processing Status",
        interactive=False,
        show_label=True
    )
    
    with gr.Row():
        with gr.Column(scale=2):
            response_output = gr.Textbox(
                label="🤖 AI Response",
                lines=8,
                interactive=False,
                show_label=True
            )
        
        with gr.Column(scale=1):
            sources_output = gr.Textbox(
                label="📚 Sources",
                lines=8,
                interactive=False,
                show_label=True
            )
    
    # Example section
    gr.HTML("""
        <div style="margin-top: 2em; padding: 1.5em; background: #f8f9fa; border-radius: 12px; border-left: 4px solid #667eea;">
            <h3 style="margin-top: 0; color: #333;">💡 Example Usage</h3>
            <p><strong>URL:</strong> https://openai.com</p>
            <p><strong>Question:</strong> What are the main products and services offered?</p>
            <p><strong>Data Source:</strong> Multiple links (first 5)</p>
        </div>
    """)
    
    # Add a note about the system status
    gr.HTML("""
        <div style="margin-top: 1em; padding: 1em; background: #e3f2fd; border-radius: 8px; border-left: 4px solid #2196f3;">
            <p style="margin: 0; color: #0d47a1;">
                ℹ️ <strong>Note:</strong> If you encounter connection errors, please wait a moment for the system to initialize and try again.
            </p>
        </div>
    """)
    
    # Connect the function
    process_btn.click(
        fn=process_web_rag,
        inputs=[url_input, prompt_input, data_source],
        outputs=[status_output, response_output, sources_output],
        show_progress=True
    )
    
    # Add keyboard shortcut
    url_input.submit(
        fn=process_web_rag,
        inputs=[url_input, prompt_input, data_source],
        outputs=[status_output, response_output, sources_output],
        show_progress=True
    )
    
    prompt_input.submit(
        fn=process_web_rag,
        inputs=[url_input, prompt_input, data_source],
        outputs=[status_output, response_output, sources_output],
        show_progress=True
    )

if __name__ == "__main__":
    app.launch(
        server_name="0.0.0.0",
        server_port=7860,
        share=False,
        show_error=True,
        quiet=False
    )