Spaces:

shahzeb171
/

code-compass

Sleeping

App Files Files Community

code-compass / app.py

shahzeb171

dir add

8cdaf72 3 months ago

raw

history blame contribute delete

18.7 kB

	import gradio as gr
	import logging
	from datetime import datetime
	from pathlib import Path
	from scripts.RepositoryHandler import RepositoryHandler
	import os

	os.environ["CUDA_VISIBLE_DEVICES"] = "7"

	# --- Setup Logging ---
	def setup_logger():
	log_dir = Path("logs")
	log_dir.mkdir(parents=True, exist_ok=True)
	timestamp = datetime.now().strftime("%Y%m%d_%H%M")
	log_file = log_dir / f"{timestamp}_code_compass.log"

	logger = logging.getLogger("code_compass")
	logger.setLevel(logging.DEBUG)

	# Console handler
	ch = logging.StreamHandler()
	ch.setLevel(logging.INFO)

	# File handler
	fh = logging.FileHandler(log_file)
	fh.setLevel(logging.DEBUG)

	# Formatter
	formatter = logging.Formatter("%(asctime)s [%(levelname)s] %(message)s")
	ch.setFormatter(formatter)
	fh.setFormatter(formatter)

	logger.addHandler(ch)
	logger.addHandler(fh)
	return logger

	setup_logger()
	logger = logging.getLogger("code_compass")
	# Global repository handler instance
	logger.info("Checking for model...")
	if not os.path.exists("models/Qwen2.5-Coder-7B-Instruct-Q4_K_M.gguf"):
	logger.info("Downloading model...")
	Path("models").mkdir(parents=True, exist_ok=True)
	os.system("wget -q https://huggingface.co/bartowski/Qwen2.5-Coder-7B-Instruct-GGUF/resolve/main/Qwen2.5-Coder-7B-Instruct-Q4_K_M.gguf -O models/Qwen2.5-Coder-7B-Instruct-Q4_K_M.gguf")



	repo_handler = RepositoryHandler()


	def process_repository(input_type, github_url, zip_file):
	"""Process repository based on input type"""

	# Clean up any previous repository
	repo_handler.cleanup()

	if input_type == "GitHub URL":
	if not github_url or not github_url.strip():
	return "❌ Please enter a GitHub repository URL", "", "disabled", "disabled"

	if not repo_handler.validate_github_url(github_url.strip()):
	return "❌ Invalid GitHub URL format. Please use: https://github.com/username/repository", "", "disabled", "disabled"

	success, message = repo_handler.download_github_repo(github_url.strip())

	else: # ZIP File
	if zip_file is None:
	return "❌ Please upload a ZIP file", "", "disabled", "disabled"

	is_valid, validation_msg = repo_handler.validate_zip_file(zip_file)
	if not is_valid:
	return f"❌ {validation_msg}", "", "disabled", "disabled"

	success, message = repo_handler.extract_zip_file(zip_file)

	if success:
	structure = repo_handler.get_repo_structure()
	return message, structure, "🚀 Process Repository", "disabled" # Enable process button, keep query disabled
	else:
	return message, "", "disabled", "disabled"

	def process_chunks():
	"""Process repository into chunks and store in vector database"""
	if not repo_handler.is_loaded:
	return "❌ No repository loaded", "disabled"

	# Run processing in background thread to avoid blocking UI
	def background_processing():
	return repo_handler.process_and_store_chunks()

	try:
	success, message = background_processing()
	if success:
	return message, "Ask AI" # Enable query functionality
	else:
	return message, "disabled"
	except Exception as e:
	return f"❌ Error processing chunks: {str(e)}", "disabled"

	def handle_query(query):
	"""Handle user queries about the repository"""
	if not repo_handler.is_loaded:
	return "❌ No repository loaded. Please load a repository first."

	if not repo_handler.chunks:
	return "❌ Repository not processed yet. Please click 'Process Repository' first."

	if not query or not query.strip():
	return "Please enter a query about the repository."

	return repo_handler.query_repository(query.strip())

	def get_repo_stats():
	"""Get repository statistics for display"""
	if not repo_handler.is_loaded:
	return "No repository loaded"

	if repo_handler.vector_store and repo_handler.chunks:
	try:
	# Get repository overview from vector store
	overview = repo_handler.vector_store.get_repository_overview(repo_handler.repo_name)
	logger.debug(f"Repository overview: {overview}")
	if "error" not in overview:
	stats = f"""📊 Repository Statistics

	🏷️ Repository: {overview['repo_name']}
	📦 Total Chunks: {overview['total_chunks']}
	📁 Files: {overview['files_count']}
	🏛️ Classes: {overview['classes_count']}
	⚙️ Functions: {overview['functions_count']}
	💻 Languages: {', '.join(overview['languages'])}

	📋 Chunk Distribution:
	"""
	for chunk_type, count in overview['chunk_distribution'].items():
	stats += f"- {chunk_type.title()}: {count}\n"

	return stats
	else:
	return f"Error getting stats: {overview['error']}"
	except Exception as e:
	return f"Error getting repository stats: {str(e)}"

	return "Repository loaded but not processed yet"
	# Additional handler functions for LLM integration
	def initialize_llm():
	"""Initialize LLM model loading"""
	return repo_handler.initialize_llm()

	def handle_query_with_llm(query, use_llm):
	"""Handle user queries with optional LLM processing"""
	if not repo_handler.is_loaded:
	return "❌ No repository loaded. Please load a repository first."

	if not repo_handler.chunks:
	return "❌ Repository not processed yet. Please click 'Process Repository' first."

	if not query or not query.strip():
	return "Please enter a query about the repository."

	return repo_handler.query_repository(query.strip(), use_llm=use_llm)

	def clear_conversation():
	"""Clear LLM conversation history"""
	if repo_handler.llm:
	repo_handler.llm.clear_conversation()
	return "🗑️ Conversation history cleared!"
	return "❌ LLM not initialized"

	def export_conversation():
	"""Export conversation history"""
	if repo_handler.llm and repo_handler.llm.is_model_ready():
	conversation = repo_handler.llm.export_conversation()
	if conversation:
	# Format for display
	export_text = "# Conversation Export\n\n"
	for msg in conversation:
	role_emoji = {"system": "⚙️", "user": "👤", "assistant": "🤖"}.get(msg["role"], "💬")
	export_text += f"## {role_emoji} {msg['role'].title()}\n"
	export_text += f"Time: {msg['timestamp']}\n\n"
	export_text += f"{msg['content']}\n\n---\n\n"
	return export_text
	else:
	return "No conversation to export"
	return "❌ LLM not ready or no conversation history"

	def get_llm_status():
	"""Get current LLM status"""
	if not repo_handler.llm_loading_started:
	return "🔄 LLM not initialized"
	elif repo_handler.llm.is_model_ready():
	model_info = repo_handler.llm.get_model_info()
	conversation_summary = repo_handler.llm.get_conversation_summary()
	return f"""✅ LLM Ready!

	Model: Qwen2.5-Coder-7B-Instruct (Q4_K_M)
	Context Window: {model_info['context_window']} tokens
	Temperature: {model_info['temperature']}
	Status: {conversation_summary}

	🤖 Ready for intelligent code analysis!"""
	else:
	return "⏳ LLM Loading... Please wait for model initialization to complete."

	def create_interface():
	"""Create the Gradio interface"""

	with gr.Blocks(title="Code Compass", theme=gr.themes.Soft()) as demo:

	gr.Markdown("""
	# 🔍 Code Compass

	Upload your repository via GitHub URL or ZIP file, process it with AI-powered chunking, and query your codebase using semantic search!
	""")

	with gr.Row():
	with gr.Column(scale=2):

	# Input section
	with gr.Group():
	gr.Markdown("### 📥 Repository Input")

	input_type = gr.Dropdown(
	choices=["GitHub URL", "ZIP File"],
	value="GitHub URL",
	label="Input Method",
	info="Choose how you want to provide your repository"
	)

	github_url = gr.Textbox(
	label="GitHub Repository URL",
	placeholder="https://github.com/username/repository",
	visible=True
	)

	zip_file = gr.File(
	label="Upload ZIP File",
	file_types=[".zip"],
	visible=False
	)

	load_btn = gr.Button("📁 Load Repository", variant="primary")

	# Processing section
	with gr.Group():
	gr.Markdown("### ⚙️ Repository Processing")
	gr.Markdown("After loading, process your repository to enable AI-powered search")

	process_btn = gr.Button("🚀 Process Repository", interactive=False, variant="secondary")

	# Status section
	with gr.Group():
	gr.Markdown("### 📊 Status")
	status_output = gr.Textbox(
	label="Status",
	placeholder="Ready to load repository...",
	interactive=False,
	lines=3
	)

	with gr.Column(scale=1):
	with gr.Group():
	gr.Markdown("### 📁 Repository Structure")
	structure_output = gr.Code(
	label="Directory Structure",
	# language="text",
	interactive=False,
	lines=10
	)

	with gr.Group():
	gr.Markdown("### 📊 Repository Stats")
	stats_output = gr.Markdown(
	value="Load and process a repository to see statistics"
	)
	with gr.Group():
	gr.Markdown("### 🤖 LLM Status")
	llm_status = gr.Markdown(
	value="🔄 LLM not initialized"
	)
	init_llm_btn = gr.Button("🚀 Initialize LLM", variant="secondary")
	# Query section
	with gr.Row():
	with gr.Column():
	gr.Markdown("### 💬 Query Repository")
	gr.Markdown("Ask questions about your code using natural language. The AI will search through your processed code chunks to find relevant information.")

	with gr.Row():
	query_input = gr.Textbox(
	label="Ask about your code",
	placeholder="e.g., 'What does this repository do?', 'Show me authentication functions', 'How is error handling implemented?'",
	lines=2,
	scale=4
	)
	query_btn = gr.Button("🔍 Ask Question", interactive=False, scale=1)
	use_llm_toggle = gr.Checkbox(
	label="Use AI Analysis",
	value=True,
	info="Get intelligent responses using LLM"
	)
	# Conversation controls
	with gr.Row():
	clear_chat_btn = gr.Button("🗑️ Clear Chat History", variant="secondary", interactive=False)
	export_chat_btn = gr.Button("📥 Export Chat", variant="secondary", interactive=False)
	query_output = gr.Markdown(
	value="Load and process a repository first to start querying...",
	height=400
	)

	# Advanced options (collapsible)
	# with gr.Accordion("🛠️ Advanced Options", open=False):
	# with gr.Row():
	# with gr.Column():
	# gr.Markdown("### 🔧 Pinecone Configuration")
	# api_key_input = gr.Textbox(
	# label="Pinecone API Key",
	# placeholder="Enter your Pinecone API key (or set PINECONE_API_KEY env var)",
	# type="password"
	# )
	# environment_input = gr.Textbox(
	# label="Pinecone Environment",
	# value="us-west1-gcp-free",
	# placeholder="e.g., us-west1-gcp-free"
	# )

	# with gr.Column():
	# gr.Markdown("### 📈 Processing Options")
	# complexity_threshold = gr.Slider(
	# minimum=5,
	# maximum=50,
	# value=20,
	# step=5,
	# label="Complexity Threshold",
	# info="Functions above this complexity will be sub-chunked"
	# )

	# Event handlers
	def toggle_inputs(choice):
	return (
	gr.update(visible=(choice == "GitHub URL")),
	gr.update(visible=(choice == "ZIP File"))
	)

	def update_buttons_after_load(status_text):
	# Enable process button if repository is successfully loaded
	is_loaded = "✅" in status_text and "successfully" in status_text.lower()
	return gr.update(interactive=is_loaded)

	def update_query_button_after_process(status_text):
	# Enable query button if processing is successful
	is_processed = "✅" in status_text and "complete" in status_text.lower()
	return gr.update(interactive=is_processed)

	def update_buttons_after_process(status_text):
	# Enable query button if processing is successful
	is_processed = "✅" in status_text and "complete" in status_text.lower()
	return (
	gr.update(interactive=is_processed), # query_btn
	gr.update(interactive=is_processed), # clear_chat_btn
	gr.update(interactive=is_processed) # export_chat_btn
	)

	def update_llm_status():
	return get_llm_status()

	def update_stats(status_output):
	return get_repo_stats(), update_buttons_after_load(status_output), update_query_button_after_process(status_output)

	# Wire up the interface
	input_type.change(
	fn=toggle_inputs,
	inputs=[input_type],
	outputs=[github_url, zip_file]
	)

	load_btn.click(
	fn=process_repository,
	inputs=[input_type, github_url, zip_file],
	outputs=[status_output, structure_output, process_btn, query_btn]
	).then(
	fn=update_stats,
	inputs=[status_output],
	outputs=[stats_output, process_btn, query_btn]
	)

	process_btn.click(
	fn=process_chunks,
	outputs=[status_output, query_btn]
	).then(
	fn=update_stats,
	inputs=[status_output],
	outputs=[stats_output, process_btn, query_btn]
	)

	# Query handling
	query_btn.click(
	fn=handle_query_with_llm,
	inputs=[query_input, use_llm_toggle],
	outputs=[query_output]
	).then(
	fn=update_llm_status,
	outputs=[llm_status]
	)

	# Chat management
	clear_chat_btn.click(
	fn=clear_conversation,
	outputs=[query_output]
	).then(
	fn=update_llm_status,
	outputs=[llm_status]
	)

	# Allow Enter key to submit query
	query_input.submit(
	fn=handle_query_with_llm,
	inputs=[query_input, use_llm_toggle],
	outputs=[query_output]
	)
	# LLM initialization
	init_llm_btn.click(
	fn=initialize_llm,
	outputs=[llm_status]
	).then(
	fn=update_llm_status,
	outputs=[llm_status]
	)
	# Add some helpful examples
	gr.Markdown("""
	### 📝 Example Repositories to Try:
	- `https://github.com/pallets/flask` - Popular Python web framework
	- `https://github.com/requests/requests` - HTTP library for Python
	- `https://github.com/fastapi/fastapi` - Modern Python web framework
	- `https://github.com/psf/black` - Python code formatter

	### 💡 Example Queries:
	- "What is the main purpose of this repository?"
	- "Show me all the authentication functions"
	- "How is error handling implemented?"
	- "What are the main classes and their responsibilities?"
	- "Find functions that handle file operations"
	- "Show me the configuration management code"

	### ⚙️ Setup Requirements:
	1. Pinecone API Key: Get a free API key from [Pinecone.io](https://www.pinecone.io/)
	2. Environment Variables: Set `PINECONE_API_KEY` in your environment or enter it in Advanced Options
	3. Internet Connection: Required for downloading repositories and accessing Pinecone

	### 🚀 How It Works:
	1. Load: Repository is downloaded/extracted and validated
	2. Process: Code is analyzed and split into hierarchical chunks (file → class → function → block)
	3. Store: Chunks are embedded using AI and stored in Pinecone vector database
	4. Query: Your questions are semantically matched against stored code chunks
	""")

	return demo

	if __name__ == "__main__":
	# Create and launch the interface
	demo = create_interface()

	# Launch with some nice settings
	demo.launch(
	server_name="0.0.0.0", # Allow external access
	server_port=7860, # Standard port
	share=False, # Set to True to create public link
	debug=True # Enable debug mode for development
	)