Spaces:

shahzeb171
/

code-compass

Sleeping

File size: 18,721 Bytes

import gradio as gr
import logging
from datetime import datetime
from pathlib import Path
from scripts.RepositoryHandler import RepositoryHandler
import os

os.environ["CUDA_VISIBLE_DEVICES"] = "7"

# --- Setup Logging ---
def setup_logger():
    log_dir = Path("logs")
    log_dir.mkdir(parents=True, exist_ok=True)
    timestamp = datetime.now().strftime("%Y%m%d_%H%M")
    log_file = log_dir / f"{timestamp}_code_compass.log"

    logger = logging.getLogger("code_compass")
    logger.setLevel(logging.DEBUG)

    # Console handler
    ch = logging.StreamHandler()
    ch.setLevel(logging.INFO)

    # File handler
    fh = logging.FileHandler(log_file)
    fh.setLevel(logging.DEBUG)

    # Formatter
    formatter = logging.Formatter("%(asctime)s [%(levelname)s] %(message)s")
    ch.setFormatter(formatter)
    fh.setFormatter(formatter)

    logger.addHandler(ch)
    logger.addHandler(fh)
    return logger

setup_logger()
logger = logging.getLogger("code_compass")
# Global repository handler instance
logger.info("Checking for model...")
if not os.path.exists("models/Qwen2.5-Coder-7B-Instruct-Q4_K_M.gguf"):
    logger.info("Downloading model...")
    Path("models").mkdir(parents=True, exist_ok=True)
    os.system("wget -q https://huggingface.co/bartowski/Qwen2.5-Coder-7B-Instruct-GGUF/resolve/main/Qwen2.5-Coder-7B-Instruct-Q4_K_M.gguf -O models/Qwen2.5-Coder-7B-Instruct-Q4_K_M.gguf")



repo_handler = RepositoryHandler()


def process_repository(input_type, github_url, zip_file):
    """Process repository based on input type"""
    
    # Clean up any previous repository
    repo_handler.cleanup()
    
    if input_type == "GitHub URL":
        if not github_url or not github_url.strip():
            return "❌ Please enter a GitHub repository URL", "", "disabled", "disabled"
        
        if not repo_handler.validate_github_url(github_url.strip()):
            return "❌ Invalid GitHub URL format. Please use: https://github.com/username/repository", "", "disabled", "disabled"
        
        success, message = repo_handler.download_github_repo(github_url.strip())
        
    else:  # ZIP File
        if zip_file is None:
            return "❌ Please upload a ZIP file", "", "disabled", "disabled"
        
        is_valid, validation_msg = repo_handler.validate_zip_file(zip_file)
        if not is_valid:
            return f"❌ {validation_msg}", "", "disabled", "disabled"
        
        success, message = repo_handler.extract_zip_file(zip_file)
    
    if success:
        structure = repo_handler.get_repo_structure()
        return message, structure, "🚀 Process Repository", "disabled"  # Enable process button, keep query disabled
    else:
        return message, "", "disabled", "disabled"

def process_chunks():
    """Process repository into chunks and store in vector database"""
    if not repo_handler.is_loaded:
        return "❌ No repository loaded", "disabled"
    
    # Run processing in background thread to avoid blocking UI
    def background_processing():
        return repo_handler.process_and_store_chunks()
    
    try:
        success, message = background_processing()
        if success:
            return message, "Ask AI"  # Enable query functionality
        else:
            return message, "disabled"
    except Exception as e:
        return f"❌ Error processing chunks: {str(e)}", "disabled"

def handle_query(query):
    """Handle user queries about the repository"""
    if not repo_handler.is_loaded:
        return "❌ No repository loaded. Please load a repository first."
    
    if not repo_handler.chunks:
        return "❌ Repository not processed yet. Please click 'Process Repository' first."
    
    if not query or not query.strip():
        return "Please enter a query about the repository."
    
    return repo_handler.query_repository(query.strip())

def get_repo_stats():
    """Get repository statistics for display"""
    if not repo_handler.is_loaded:
        return "No repository loaded"
    
    if repo_handler.vector_store and repo_handler.chunks:
        try:
            # Get repository overview from vector store
            overview = repo_handler.vector_store.get_repository_overview(repo_handler.repo_name)
            logger.debug(f"Repository overview: {overview}")
            if "error" not in overview:
                stats = f"""📊 **Repository Statistics**

🏷️ **Repository:** {overview['repo_name']}
📦 **Total Chunks:** {overview['total_chunks']}
📁 **Files:** {overview['files_count']}
🏛️ **Classes:** {overview['classes_count']}  
⚙️ **Functions:** {overview['functions_count']}
💻 **Languages:** {', '.join(overview['languages'])}

📋 **Chunk Distribution:**
"""
                for chunk_type, count in overview['chunk_distribution'].items():
                    stats += f"- {chunk_type.title()}: {count}\n"
                
                return stats
            else:
                return f"Error getting stats: {overview['error']}"
        except Exception as e:
            return f"Error getting repository stats: {str(e)}"
    
    return "Repository loaded but not processed yet"
# Additional handler functions for LLM integration
def initialize_llm():
    """Initialize LLM model loading"""
    return repo_handler.initialize_llm()

def handle_query_with_llm(query, use_llm):
    """Handle user queries with optional LLM processing"""
    if not repo_handler.is_loaded:
        return "❌ No repository loaded. Please load a repository first."
    
    if not repo_handler.chunks:
        return "❌ Repository not processed yet. Please click 'Process Repository' first."
    
    if not query or not query.strip():
        return "Please enter a query about the repository."
    
    return repo_handler.query_repository(query.strip(), use_llm=use_llm)

def clear_conversation():
    """Clear LLM conversation history"""
    if repo_handler.llm:
        repo_handler.llm.clear_conversation()
        return "🗑️ Conversation history cleared!"
    return "❌ LLM not initialized"

def export_conversation():
    """Export conversation history"""
    if repo_handler.llm and repo_handler.llm.is_model_ready():
        conversation = repo_handler.llm.export_conversation()
        if conversation:
            # Format for display
            export_text = "# Conversation Export\n\n"
            for msg in conversation:
                role_emoji = {"system": "⚙️", "user": "👤", "assistant": "🤖"}.get(msg["role"], "💬")
                export_text += f"## {role_emoji} {msg['role'].title()}\n"
                export_text += f"**Time:** {msg['timestamp']}\n\n"
                export_text += f"{msg['content']}\n\n---\n\n"
            return export_text
        else:
            return "No conversation to export"
    return "❌ LLM not ready or no conversation history"

def get_llm_status():
    """Get current LLM status"""
    if not repo_handler.llm_loading_started:
        return "🔄 LLM not initialized"
    elif repo_handler.llm.is_model_ready():
        model_info = repo_handler.llm.get_model_info()
        conversation_summary = repo_handler.llm.get_conversation_summary()
        return f"""✅ **LLM Ready!**
        
**Model:** Qwen2.5-Coder-7B-Instruct (Q4_K_M)
**Context Window:** {model_info['context_window']} tokens
**Temperature:** {model_info['temperature']}
**Status:** {conversation_summary}

🤖 Ready for intelligent code analysis!"""
    else:
        return "⏳ **LLM Loading...** Please wait for model initialization to complete."

def create_interface():
    """Create the Gradio interface"""
    
    with gr.Blocks(title="Code Compass", theme=gr.themes.Soft()) as demo:
        
        gr.Markdown("""
        # 🔍 Code Compass
        
        Upload your repository via GitHub URL or ZIP file, process it with AI-powered chunking, and query your codebase using semantic search!
        """)
        
        with gr.Row():
            with gr.Column(scale=2):
                
                # Input section
                with gr.Group():
                    gr.Markdown("### 📥 Repository Input")
                    
                    input_type = gr.Dropdown(
                        choices=["GitHub URL", "ZIP File"], 
                        value="GitHub URL",
                        label="Input Method",
                        info="Choose how you want to provide your repository"
                    )
                    
                    github_url = gr.Textbox(
                        label="GitHub Repository URL",
                        placeholder="https://github.com/username/repository",
                        visible=True
                    )
                    
                    zip_file = gr.File(
                        label="Upload ZIP File",
                        file_types=[".zip"],
                        visible=False
                    )
                    
                    load_btn = gr.Button("📁 Load Repository", variant="primary")
                
                # Processing section
                with gr.Group():
                    gr.Markdown("### ⚙️ Repository Processing")
                    gr.Markdown("After loading, process your repository to enable AI-powered search")
                    
                    process_btn = gr.Button("🚀 Process Repository", interactive=False, variant="secondary")
                    
                # Status section
                with gr.Group():
                    gr.Markdown("### 📊 Status")
                    status_output = gr.Textbox(
                        label="Status",
                        placeholder="Ready to load repository...",
                        interactive=False,
                        lines=3
                    )
            
            with gr.Column(scale=1):
                with gr.Group():
                    gr.Markdown("### 📁 Repository Structure")
                    structure_output = gr.Code(
                        label="Directory Structure",
                        # language="text",
                        interactive=False,
                        lines=10
                    )
                
                with gr.Group():
                    gr.Markdown("### 📊 Repository Stats")
                    stats_output = gr.Markdown(
                        value="Load and process a repository to see statistics"
                    )
                with gr.Group():
                    gr.Markdown("### 🤖 LLM Status")
                    llm_status = gr.Markdown(
                        value="🔄 LLM not initialized"
                    )
                    init_llm_btn = gr.Button("🚀 Initialize LLM", variant="secondary")
        # Query section
        with gr.Row():
            with gr.Column():
                gr.Markdown("### 💬 Query Repository")
                gr.Markdown("Ask questions about your code using natural language. The AI will search through your processed code chunks to find relevant information.")
                
                with gr.Row():
                    query_input = gr.Textbox(
                        label="Ask about your code",
                        placeholder="e.g., 'What does this repository do?', 'Show me authentication functions', 'How is error handling implemented?'",
                        lines=2,
                        scale=4
                    )
                    query_btn = gr.Button("🔍 Ask Question", interactive=False, scale=1)
                    use_llm_toggle = gr.Checkbox(
                            label="Use AI Analysis",
                            value=True,
                            info="Get intelligent responses using LLM"
                        )
                    # Conversation controls
                with gr.Row():
                    clear_chat_btn = gr.Button("🗑️ Clear Chat History", variant="secondary", interactive=False)
                    export_chat_btn = gr.Button("📥 Export Chat", variant="secondary", interactive=False)
                query_output = gr.Markdown(
                    value="Load and process a repository first to start querying...",
                    height=400
                )
        
        # Advanced options (collapsible)
        # with gr.Accordion("🛠️ Advanced Options", open=False):
        #     with gr.Row():
        #         with gr.Column():
        #             gr.Markdown("### 🔧 Pinecone Configuration")
        #             api_key_input = gr.Textbox(
        #                 label="Pinecone API Key",
        #                 placeholder="Enter your Pinecone API key (or set PINECONE_API_KEY env var)",
        #                 type="password"
        #             )
        #             environment_input = gr.Textbox(
        #                 label="Pinecone Environment",
        #                 value="us-west1-gcp-free",
        #                 placeholder="e.g., us-west1-gcp-free"
        #             )
                
        #         with gr.Column():
        #             gr.Markdown("### 📈 Processing Options")
        #             complexity_threshold = gr.Slider(
        #                 minimum=5,
        #                 maximum=50,
        #                 value=20,
        #                 step=5,
        #                 label="Complexity Threshold",
        #                 info="Functions above this complexity will be sub-chunked"
        #             )
        
        # Event handlers
        def toggle_inputs(choice):
            return (
                gr.update(visible=(choice == "GitHub URL")),
                gr.update(visible=(choice == "ZIP File"))
            )
        
        def update_buttons_after_load(status_text):
            # Enable process button if repository is successfully loaded
            is_loaded = "✅" in status_text and "successfully" in status_text.lower()
            return gr.update(interactive=is_loaded)
        
        def update_query_button_after_process(status_text):
            # Enable query button if processing is successful
            is_processed = "✅" in status_text and "complete" in status_text.lower()
            return gr.update(interactive=is_processed)
        
        def update_buttons_after_process(status_text):
            # Enable query button if processing is successful
            is_processed = "✅" in status_text and "complete" in status_text.lower()
            return (
                gr.update(interactive=is_processed),  # query_btn
                gr.update(interactive=is_processed),  # clear_chat_btn  
                gr.update(interactive=is_processed)   # export_chat_btn
            )
        
        def update_llm_status():
            return get_llm_status()
        
        def update_stats(status_output):
            return get_repo_stats(), update_buttons_after_load(status_output), update_query_button_after_process(status_output)
        
        # Wire up the interface
        input_type.change(
            fn=toggle_inputs,
            inputs=[input_type],
            outputs=[github_url, zip_file]
        )
        
        load_btn.click(
            fn=process_repository,
            inputs=[input_type, github_url, zip_file],
            outputs=[status_output, structure_output, process_btn, query_btn]
        ).then(
            fn=update_stats,
            inputs=[status_output],
            outputs=[stats_output, process_btn, query_btn]
        )
        
        process_btn.click(
            fn=process_chunks,
            outputs=[status_output, query_btn]
        ).then(
            fn=update_stats,
            inputs=[status_output],
            outputs=[stats_output, process_btn, query_btn]
        )
        
        # Query handling
        query_btn.click(
            fn=handle_query_with_llm,
            inputs=[query_input, use_llm_toggle],
            outputs=[query_output]
        ).then(
            fn=update_llm_status,
            outputs=[llm_status]
        )
        
        # Chat management
        clear_chat_btn.click(
            fn=clear_conversation,
            outputs=[query_output]
        ).then(
            fn=update_llm_status,
            outputs=[llm_status]
        )
        
        # Allow Enter key to submit query
        query_input.submit(
            fn=handle_query_with_llm,
            inputs=[query_input, use_llm_toggle],
            outputs=[query_output]
        )
         # LLM initialization
        init_llm_btn.click(
            fn=initialize_llm,
            outputs=[llm_status]
        ).then(
            fn=update_llm_status,
            outputs=[llm_status]
        )
        # Add some helpful examples
        gr.Markdown("""
        ### 📝 Example Repositories to Try:
        - `https://github.com/pallets/flask` - Popular Python web framework
        - `https://github.com/requests/requests` - HTTP library for Python
        - `https://github.com/fastapi/fastapi` - Modern Python web framework
        - `https://github.com/psf/black` - Python code formatter
        
        ### 💡 Example Queries:
        - "What is the main purpose of this repository?"
        - "Show me all the authentication functions"
        - "How is error handling implemented?"
        - "What are the main classes and their responsibilities?"  
        - "Find functions that handle file operations"
        - "Show me the configuration management code"
        
        ### ⚙️ Setup Requirements:
        1. **Pinecone API Key**: Get a free API key from [Pinecone.io](https://www.pinecone.io/)
        2. **Environment Variables**: Set `PINECONE_API_KEY` in your environment or enter it in Advanced Options
        3. **Internet Connection**: Required for downloading repositories and accessing Pinecone
        
        ### 🚀 How It Works:
        1. **Load**: Repository is downloaded/extracted and validated
        2. **Process**: Code is analyzed and split into hierarchical chunks (file → class → function → block)
        3. **Store**: Chunks are embedded using AI and stored in Pinecone vector database  
        4. **Query**: Your questions are semantically matched against stored code chunks
        """)
    
    return demo

if __name__ == "__main__":
    # Create and launch the interface
    demo = create_interface()
    
    # Launch with some nice settings
    demo.launch(
        server_name="0.0.0.0",  # Allow external access
        server_port=7860,       # Standard port
        share=False,            # Set to True to create public link
        debug=True              # Enable debug mode for development
    )