Spaces:

tomvaillant
/

osint-llm

Sleeping

File size: 6,602 Bytes

"""
OSINT Investigation Assistant - Gradio App

A RAG-powered assistant that helps investigators develop methodologies
for OSINT investigations using a database of 344+ OSINT tools.
"""

import os
import gradio as gr
from dotenv import load_dotenv
from src.rag_pipeline import create_pipeline

# Load environment variables
load_dotenv()

# Initialize the RAG pipeline
print("Initializing OSINT Investigation Pipeline...")
try:
    pipeline = create_pipeline(
        retrieval_k=5,
        model=os.getenv("LLM_MODEL", "meta-llama/Llama-3.1-8B-Instruct"),
        temperature=float(os.getenv("LLM_TEMPERATURE", "0.2"))
    )
    print("✓ Pipeline initialized successfully")
except Exception as e:
    print(f"✗ Error initializing pipeline: {e}")
    raise

# Load SVG once at module level
with open("assets/bellingcat.svg", "r") as f:
    BELLINGCAT_SVG = f.read()


def investigate_stream(message: str, history: list):
    """
    Streaming version of investigation function

    Args:
        message: User's investigation query
        history: Chat history

    Yields:
        Response chunks
    """
    try:
        response_stream = pipeline.generate_methodology(message, stream=True)
        full_response = ""
        for chunk in response_stream:
            full_response += chunk
            yield full_response
    except Exception as e:
        yield f"Error generating response: {str(e)}\n\nPlease check your environment variables (HF_TOKEN, SUPABASE_URL, SUPABASE_KEY) and try again."


# Custom CSS for centered, clean appearance with dark theme
custom_css = """
.gradio-container {
    max-width: 800px !important;
    margin: 0 auto !important;
}
/* Force white text throughout */
.gradio-container,
.gradio-container * {
    color: white !important;
}
/* Chat interface styling */
.custom-chat {
    background: rgba(30, 58, 95, 0.6) !important;
}
.custom-chat * {
    background: rgba(30, 58, 95, 0.6) !important;
}
.custom-chat .message-row {
    background: rgba(30, 58, 95, 0.4) !important;
}
.custom-chat .message {
    color: white !important;
}
/* Loading spinner styling */
.custom-chat .generating,
.custom-chat .pending {
    border-color: rgba(255, 255, 255, 0.3) !important;
}
.custom-chat .generating::before,
.custom-chat .pending::before {
    background: white !important;
}
/* Loading dots */
.custom-chat .dot-flashing,
.custom-chat .dot-flashing::before,
.custom-chat .dot-flashing::after {
    background: white !important;
}
/* Input fields - target parent containers using :has() */
textarea:has(textarea),
label:has(textarea),
.block:has(textarea),
div:has(> textarea) {
    background: rgba(30, 58, 95, 0.6) !important;
}
/* Target textarea itself */
textarea, input {
    background: rgba(30, 58, 95, 0.6) !important;
    color: white !important;
    border-color: rgba(255, 255, 255, 0.2) !important;
    overflow-y: visible !important;
}
textarea::placeholder {
    color: rgba(255, 255, 255, 0.5) !important;
}
/* Buttons */
button {
    color: white !important;
}
/* Remove background from submit/stop buttons */
.stop-button, .submit-button {
    background: transparent !important;
    transition: transform 0.2s ease, opacity 0.2s ease !important;
}
.stop-button:hover, .submit-button:hover {
    transform: scale(1.1) !important;
    opacity: 0.8 !important;
}
.stop-button:active, .submit-button:active {
    transform: scale(0.95) !important;
}
.methodology-section {
    margin-top: 40px;
    padding-top: 20px;
    border-top: 1px solid rgba(255, 255, 255, 0.2);
}
.methodology-header {
    font-size: 14px;
    font-weight: 600;
    margin-bottom: 15px;
    opacity: 0.6;
    color: white !important;
}
.methodology-disclaimer {
    font-size: 13px;
    opacity: 0.5;
    margin-bottom: 15px;
    line-height: 1.6;
    color: white !important;
}
.methodology-disclaimer a {
    color: white !important;
    text-decoration: underline;
    opacity: 0.8;
}
.thanks-text {
    text-align: left;
    opacity: 0.4;
    font-size: 12px;
    margin-bottom: 10px;
    text-transform: uppercase;
    letter-spacing: 1px;
    color: white !important;
}
.logo-container {
    text-align: left;
    margin-bottom: 20px;
}
.logo-container svg,
.logo-container img {
    max-width: 150px;
    height: auto;
    opacity: 0.7;
    filter: brightness(0) invert(1);
}
"""

# Create Gradio interface
with gr.Blocks(
    title="OSINT Investigation Assistant",
    theme=gr.themes.Soft(primary_hue="slate").set(
        body_background_fill="*neutral_950",
        body_background_fill_dark="*neutral_950",
        input_background_fill="rgba(30, 58, 95, 0.6)",
        input_background_fill_dark="rgba(30, 58, 95, 0.6)"
    ),
    css=custom_css
) as demo:
    gr.Markdown("""
    # 🔍 OSINT LLM

    Get structured investigation methodologies and tool recommendations from Bellingcat's database of 344+ OSINT tools.
    """)

    # Main chat interface
    chatbot = gr.ChatInterface(
        fn=investigate_stream,
        type="messages",
        chatbot=gr.Chatbot(elem_classes="custom-chat"),
        examples=[
            "How do I investigate a suspicious domain?",
            "What tools can I use to verify an image's authenticity?",
            "How can I trace the origin of a social media account?",
            "What's the best way to archive web content for investigation?",
            "How do I geolocate an image from social media?"
        ],
        cache_examples=False,
        api_name="investigate"
    )

    # Methodology section (below chat interface)
    gr.HTML(f"""
    <div class="methodology-section">
        <div class="methodology-header">Methodology</div>
        <div class="methodology-disclaimer">
            The data used by this model was sourced from:
            <a href="https://github.com/bellingcat/toolkit" target="_blank">https://github.com/bellingcat/toolkit</a>
        </div>
        <div class="thanks-text">With thanks to</div>
        <div class="logo-container">
            {BELLINGCAT_SVG}
        </div>
    </div>
    """)

# Launch configuration
if __name__ == "__main__":
    # Check for required environment variables
    required_vars = ["SUPABASE_URL", "SUPABASE_KEY", "HF_TOKEN"]
    missing_vars = [var for var in required_vars if not os.getenv(var)]

    if missing_vars:
        print(f"⚠️  Warning: Missing environment variables: {', '.join(missing_vars)}")
        print("Please set these in your .env file or as environment variables")

    # Launch the app
    demo.launch(
        server_name="0.0.0.0",
        server_port=7860,
        share=False,
        show_api=True
    )