Spaces:

tomvaillant
/

osint-llm

Running

Tom Claude commited on Nov 4

Commit

8c1e2c8

1 Parent(s): 42e8b59

Polish UI and optimize for production deployment

UI improvements:
- Dark theme with centered layout (800px max-width)
- Custom chat styling with dark blue input wrapper
- Animated submit button with hover effects
- Loading spinner with white dots for visibility
- Bellingcat logo attribution with methodology section

Code optimizations:
- Removed unused functions (investigate, get_tool_recommendations)
- Fixed temperature default (0.2 → 0.3 for better explanations)
- Increased max_tokens (600 → 800) for detailed instructions
- Optimized SVG loading (module-level constant)
- Updated env var checks (SUPABASE_URL, SUPABASE_KEY, HF_TOKEN)

Prompt improvements:
- Enhanced to explain HOW to use tools, not just list them
- Added "How to use" and "What you'll find" for each step
- Better follow-up question handling
- Increased word limit (300 → 400) for actionable guidance

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

Files changed (4) hide show

app.py +142 -163
assets/bellingcat.svg +10 -0
src/llm_client.py +2 -2
src/prompts.py +29 -21

app.py CHANGED Viewed

@@ -19,31 +19,16 @@ try:
     pipeline = create_pipeline(
         retrieval_k=5,
         model=os.getenv("LLM_MODEL", "meta-llama/Llama-3.1-8B-Instruct"),
-        temperature=float(os.getenv("LLM_TEMPERATURE", "0.7"))
     )
     print("✓ Pipeline initialized successfully")
 except Exception as e:
     print(f"✗ Error initializing pipeline: {e}")
     raise
-def investigate(message: str, history: list) -> str:
-    """
-    Main chat function for investigation queries
-    Args:
-        message: User's investigation query
-        history: Chat history (list of [user_msg, bot_msg] pairs)
-    Returns:
-        Generated investigation methodology
-    """
-    try:
-        # Generate response (non-streaming for simplicity)
-        response = pipeline.generate_methodology(message, stream=False)
-        return response
-    except Exception as e:
-        return f"Error generating response: {str(e)}\n\nPlease check your environment variables (HF_TOKEN, SUPABASE_CONNECTION_STRING) and try again."
 def investigate_stream(message: str, history: list):
@@ -64,76 +49,148 @@ def investigate_stream(message: str, history: list):
             full_response += chunk
             yield full_response
     except Exception as e:
-        yield f"Error generating response: {str(e)}\n\nPlease check your environment variables (HF_TOKEN, SUPABASE_CONNECTION_STRING) and try again."
-def get_tool_recommendations(query: str, k: int = 5) -> str:
-    """
-    Get tool recommendations for a query
-    Args:
-        query: Investigation query
-        k: Number of tools to recommend
-    Returns:
-        Formatted tool recommendations
-    """
-    try:
-        tools = pipeline.get_tool_recommendations(query, k=k)
-        if not tools:
-            return "No relevant tools found."
-        output = f"## Top {len(tools)} Recommended Tools\n\n"
-        for i, tool in enumerate(tools, 1):
-            output += f"### {i}. {tool['name']}\n"
-            output += f"- **Category**: {tool['category']}\n"
-            output += f"- **Cost**: {tool['cost']}\n"
-            output += f"- **URL**: {tool['url']}\n"
-            output += f"- **Description**: {tool['description']}\n"
-            if tool['details'] and tool['details'] != 'N/A':
-                output += f"- **Details**: {tool['details']}\n"
-            output += "\n"
-        return output
-    except Exception as e:
-        return f"Error retrieving tools: {str(e)}"
-# Custom CSS for better appearance
 custom_css = """
 .gradio-container {
-    max-width: 900px !important;
 }
-#component-0 {
-    max-width: 900px;
 }
 """
 # Create Gradio interface
 with gr.Blocks(
     title="OSINT Investigation Assistant",
-    theme=gr.themes.Soft(),
     css=custom_css
 ) as demo:
     gr.Markdown("""
-    # 🔍 OSINT Investigation Assistant
-    Ask me how to investigate anything using open-source intelligence methods.
-    I'll provide you with a structured methodology and recommend specific OSINT tools
-    from a database of 344+ tools.
-    **Examples:**
-    - "How do I investigate a suspicious domain?"
-    - "What tools can I use to verify an image's authenticity?"
-    - "How can I trace the origin of a social media account?"
     """)
     # Main chat interface
     chatbot = gr.ChatInterface(
         fn=investigate_stream,
         type="messages",
         examples=[
             "How do I investigate a suspicious domain?",
             "What tools can I use to verify an image's authenticity?",
@@ -142,105 +199,28 @@ with gr.Blocks(
             "How do I geolocate an image from social media?"
         ],
         cache_examples=False,
-        title="Chat Interface",
-        description="Ask your investigation questions here",
-        api_name="investigate"  # This creates the /call/investigate API endpoint
     )
-    # Additional tab for direct tool search
-    with gr.Tab("Tool Search"):
-        gr.Markdown("### Search for OSINT Tools")
-        with gr.Row():
-            tool_query = gr.Textbox(
-                label="Search Query",
-                placeholder="e.g., social media analysis, image verification, domain investigation",
-                lines=2
-            )
-            tool_count = gr.Slider(
-                minimum=1,
-                maximum=20,
-                value=5,
-                step=1,
-                label="Number of Tools"
-            )
-        tool_search_btn = gr.Button("Search Tools", variant="primary")
-        tool_output = gr.Markdown(label="Recommended Tools")
-        tool_search_btn.click(
-            fn=get_tool_recommendations,
-            inputs=[tool_query, tool_count],
-            outputs=tool_output,
-            api_name="search_tools"  # This creates the /call/search_tools API endpoint
-        )
-    # Information tab
-    with gr.Tab("About"):
-        gr.Markdown("""
-        ## About This Assistant
-        This OSINT Investigation Assistant helps researchers and investigators develop
-        structured methodologies for open-source intelligence investigations.
-        ### Features
-        - 🎯 **Structured Methodologies**: Get step-by-step investigation plans
-        - 🛠️ **Tool Recommendations**: Access a database of 344+ OSINT tools
-        - 🔍 **Context-Aware**: Tools are recommended based on your specific needs
-        - 🚀 **API Access**: Use this app via API for integration with other tools
-        ### Technology Stack
-        - **Vector Database**: Supabase with PGVector (344 OSINT tools)
-        - **LLM**: Hugging Face Inference Providers (Llama 3.1)
-        - **RAG Framework**: LangChain for retrieval-augmented generation
-        - **UI/API**: Gradio with automatic API generation
-        ### API Usage
-        This app automatically exposes API endpoints. You can access them using:
-        **Python Client:**
-        ```python
-        from gradio_client import Client
-        client = Client("your-space-url")
-        result = client.predict("How do I investigate a domain?", api_name="/investigate")
-        print(result)
-        ```
-        **cURL:**
-        ```bash
-        curl -X POST "https://your-space.hf.space/call/investigate" \\
-             -H "Content-Type: application/json" \\
-             -d '{"data": ["How do I investigate a domain?"]}'
-        ```
-        View the full API documentation at the bottom of this page (click "Use via API").
-        ### Environment Variables Required
-        - `SUPABASE_CONNECTION_STRING`: PostgreSQL connection string for Supabase
-        - `HF_TOKEN`: Hugging Face API token for Inference Providers
-        - `LLM_MODEL` (optional): Model to use (default: meta-llama/Llama-3.1-8B-Instruct)
-        - `LLM_TEMPERATURE` (optional): Temperature for generation (default: 0.7)
-        ### Data Source
-        The tool recommendations are based on the Bellingcat OSINT Toolkit and other
-        curated sources, with 344+ tools across categories including:
-        - Social Media Investigation
-        - Image and Video Analysis
-        - Domain and Network Investigation
-        - Geolocation
-        - Archiving and Preservation
-        - And more...
-        ---
-        Built with ❤️ for the OSINT community
-        """)
 # Launch configuration
 if __name__ == "__main__":
     # Check for required environment variables
-    required_vars = ["SUPABASE_CONNECTION_STRING", "HF_TOKEN"]
     missing_vars = [var for var in required_vars if not os.getenv(var)]
     if missing_vars:
@@ -248,10 +228,9 @@ if __name__ == "__main__":
         print("Please set these in your .env file or as environment variables")
     # Launch the app
-    # Set mcp_server=True to enable MCP protocol for agent integration
     demo.launch(
         server_name="0.0.0.0",
         server_port=7860,
         share=False,
-        show_api=True  # Show API documentation
     )

     pipeline = create_pipeline(
         retrieval_k=5,
         model=os.getenv("LLM_MODEL", "meta-llama/Llama-3.1-8B-Instruct"),
+        temperature=float(os.getenv("LLM_TEMPERATURE", "0.2"))
     )
     print("✓ Pipeline initialized successfully")
 except Exception as e:
     print(f"✗ Error initializing pipeline: {e}")
     raise
+# Load SVG once at module level
+with open("assets/bellingcat.svg", "r") as f:
+    BELLINGCAT_SVG = f.read()
 def investigate_stream(message: str, history: list):
             full_response += chunk
             yield full_response
     except Exception as e:
+        yield f"Error generating response: {str(e)}\n\nPlease check your environment variables (HF_TOKEN, SUPABASE_URL, SUPABASE_KEY) and try again."
+# Custom CSS for centered, clean appearance with dark theme
 custom_css = """
 .gradio-container {
+    max-width: 800px !important;
+    margin: 0 auto !important;
+}
+/* Force white text throughout */
+.gradio-container,
+.gradio-container * {
+    color: white !important;
+}
+/* Chat interface styling */
+.custom-chat {
+    background: rgba(30, 58, 95, 0.6) !important;
+}
+.custom-chat * {
+    background: rgba(30, 58, 95, 0.6) !important;
+}
+.custom-chat .message-row {
+    background: rgba(30, 58, 95, 0.4) !important;
+}
+.custom-chat .message {
+    color: white !important;
+}
+/* Loading spinner styling */
+.custom-chat .generating,
+.custom-chat .pending {
+    border-color: rgba(255, 255, 255, 0.3) !important;
+}
+.custom-chat .generating::before,
+.custom-chat .pending::before {
+    background: white !important;
+}
+/* Loading dots */
+.custom-chat .dot-flashing,
+.custom-chat .dot-flashing::before,
+.custom-chat .dot-flashing::after {
+    background: white !important;
+}
+/* Input fields - target parent containers using :has() */
+textarea:has(textarea),
+label:has(textarea),
+.block:has(textarea),
+div:has(> textarea) {
+    background: rgba(30, 58, 95, 0.6) !important;
 }
+/* Target textarea itself */
+textarea, input {
+    background: rgba(30, 58, 95, 0.6) !important;
+    color: white !important;
+    border-color: rgba(255, 255, 255, 0.2) !important;
+}
+textarea::placeholder {
+    color: rgba(255, 255, 255, 0.5) !important;
+}
+/* Buttons */
+button {
+    color: white !important;
+}
+/* Remove background from submit/stop buttons */
+.stop-button, .submit-button {
+    background: transparent !important;
+    transition: transform 0.2s ease, opacity 0.2s ease !important;
+}
+.stop-button:hover, .submit-button:hover {
+    transform: scale(1.1) !important;
+    opacity: 0.8 !important;
+}
+.stop-button:active, .submit-button:active {
+    transform: scale(0.95) !important;
+}
+.methodology-section {
+    margin-top: 40px;
+    padding-top: 20px;
+    border-top: 1px solid rgba(255, 255, 255, 0.2);
+}
+.methodology-header {
+    font-size: 14px;
+    font-weight: 600;
+    margin-bottom: 15px;
+    opacity: 0.6;
+    color: white !important;
+}
+.methodology-disclaimer {
+    font-size: 13px;
+    opacity: 0.5;
+    margin-bottom: 15px;
+    line-height: 1.6;
+    color: white !important;
+}
+.methodology-disclaimer a {
+    color: white !important;
+    text-decoration: underline;
+    opacity: 0.8;
+}
+.thanks-text {
+    text-align: left;
+    opacity: 0.4;
+    font-size: 12px;
+    margin-bottom: 10px;
+    text-transform: uppercase;
+    letter-spacing: 1px;
+    color: white !important;
+}
+.logo-container {
+    text-align: left;
+    margin-bottom: 20px;
+}
+.logo-container svg,
+.logo-container img {
+    max-width: 150px;
+    height: auto;
+    opacity: 0.7;
+    filter: brightness(0) invert(1);
 }
 """
 # Create Gradio interface
 with gr.Blocks(
     title="OSINT Investigation Assistant",
+    theme=gr.themes.Soft(primary_hue="slate").set(
+        body_background_fill="*neutral_950",
+        body_background_fill_dark="*neutral_950",
+        input_background_fill="rgba(30, 58, 95, 0.6)",
+        input_background_fill_dark="rgba(30, 58, 95, 0.6)"
+    ),
     css=custom_css
 ) as demo:
     gr.Markdown("""
+    # 🔍 OSINT LLM
+    Get structured investigation methodologies and tool recommendations from Bellingcat's database of 344+ OSINT tools.
     """)
     # Main chat interface
     chatbot = gr.ChatInterface(
         fn=investigate_stream,
         type="messages",
+        chatbot=gr.Chatbot(elem_classes="custom-chat"),
         examples=[
             "How do I investigate a suspicious domain?",
             "What tools can I use to verify an image's authenticity?",
             "How do I geolocate an image from social media?"
         ],
         cache_examples=False,
+        api_name="investigate"
     )
+    # Methodology section (below chat interface)
+    gr.HTML(f"""
+    <div class="methodology-section">
+        <div class="methodology-header">Methodology</div>
+        <div class="methodology-disclaimer">
+            The data used by this model was sourced from:
+            <a href="https://github.com/bellingcat/toolkit" target="_blank">https://github.com/bellingcat/toolkit</a>
+        </div>
+        <div class="thanks-text">With thanks to</div>
+        <div class="logo-container">
+            {BELLINGCAT_SVG}
+        </div>
+    </div>
+    """)
 # Launch configuration
 if __name__ == "__main__":
     # Check for required environment variables
+    required_vars = ["SUPABASE_URL", "SUPABASE_KEY", "HF_TOKEN"]
     missing_vars = [var for var in required_vars if not os.getenv(var)]
     if missing_vars:
         print("Please set these in your .env file or as environment variables")
     # Launch the app
     demo.launch(
         server_name="0.0.0.0",
         server_port=7860,
         share=False,
+        show_api=True
     )

assets/bellingcat.svg ADDED Viewed

src/llm_client.py CHANGED Viewed

@@ -12,8 +12,8 @@ class InferenceProviderClient:
         self,
         model: str = "meta-llama/Llama-3.1-8B-Instruct",
         api_key: Optional[str] = None,
-        temperature: float = 0.2,
-        max_tokens: int = 600
     ):
         """
         Initialize the Inference client

         self,
         model: str = "meta-llama/Llama-3.1-8B-Instruct",
         api_key: Optional[str] = None,
+        temperature: float = 0.3,
+        max_tokens: int = 800
     ):
         """
         Initialize the Inference client

src/prompts.py CHANGED Viewed

@@ -1,23 +1,27 @@
 """Prompt templates for OSINT investigation assistant"""
-SYSTEM_PROMPT = """You are an OSINT investigation assistant. Your responses must be SHORT and FOCUSED.
-STRICT RULES:
-1. ONLY recommend tools from the provided database - DO NOT suggest tools not in the list
-2. Keep your response under 300 words
-3. List 3-5 steps maximum
-4. Include tool names and URLs from the database
-5. NO lengthy explanations
-6. NO additional tools beyond what's provided
 Format:
 **Investigation Steps:**
-1. [Step] - Use [Tool Name] ([URL])
-2. [Step] - Use [Tool Name] ([URL])
-3. [Step] - Use [Tool Name] ([URL])
-**Why these tools:** [1-2 sentences max]"""
 INVESTIGATION_PROMPT_TEMPLATE = """USER QUESTION: {query}
@@ -26,20 +30,24 @@ AVAILABLE TOOLS FROM DATABASE:
 {context}
 INSTRUCTIONS:
-- Provide 3-5 investigation steps ONLY
 - Use ONLY tools from the list above
-- Include tool name + URL for each step
-- Keep response under 300 words
-- Be specific and direct
-- NO lengthy explanations
 Respond with:
-**Steps:**
 1. [Action] using [Tool Name] ([URL])
-2. [Action] using [Tool Name] ([URL])
-3. [Action] using [Tool Name] ([URL])
-**Notes:** [1-2 sentences explaining why these specific tools]"""
 FOLLOWUP_PROMPT_TEMPLATE = """You are an expert OSINT investigation assistant continuing a conversation.

 """Prompt templates for OSINT investigation assistant"""
+SYSTEM_PROMPT = """You are an OSINT investigation assistant. Provide practical, actionable guidance.
+RULES:
+1. ONLY recommend tools from the provided database
+2. Explain HOW to use each tool, not just what it does
+3. Provide step-by-step methodology in logical order
+4. Keep response under 400 words
+5. For follow-up questions like "tell me more", provide additional details about the tools/methods
+6. Be specific about inputs, outputs, and what to look for
 Format:
 **Investigation Steps:**
+1. [Action] using [Tool Name] ([URL])
+   - How: [Brief instructions on using the tool]
+   - What to look for: [Expected results/outputs]
+2. [Next action] using [Tool Name] ([URL])
+   - How: [Brief instructions]
+   - What to look for: [Expected results]
+**Key Points:** [Important considerations or tips]"""
 INVESTIGATION_PROMPT_TEMPLATE = """USER QUESTION: {query}
 {context}
 INSTRUCTIONS:
+- Provide 3-5 investigation steps in logical order
+- For EACH step, explain HOW to use the tool (what to input, what to look for)
 - Use ONLY tools from the list above
+- Include practical tips and expected outcomes
+- Keep response under 400 words total
+- If user asks "tell me more" or follow-up questions, provide additional details from the tool descriptions
 Respond with:
+**Investigation Steps:**
 1. [Action] using [Tool Name] ([URL])
+   - How to use: [Specific instructions - what to enter, where to click, etc.]
+   - What you'll find: [Expected results and what they mean]
+2. [Next action] using [Tool Name] ([URL])
+   - How to use: [Instructions]
+   - What you'll find: [Results]
+**Important Notes:** [Key considerations, tips, or warnings]"""
 FOLLOWUP_PROMPT_TEMPLATE = """You are an expert OSINT investigation assistant continuing a conversation.