Spaces:

Baction
/

Vulnerability_Scanner_Client

Running

App Files Files Community

Vulnerability_Scanner_Client / app.py

Baction

Update app.py

b788ba2 verified 28 days ago

raw

history blame contribute delete

9.98 kB

	import gradio as gr
	import re
	import requests
	from markdownify import markdownify
	from requests.exceptions import RequestException
	from smolagents import (
	CodeAgent,
	ToolCallingAgent,
	InferenceClientModel,
	WebSearchTool,
	MCPClient,
	tool
	)

	# MCP Server URL for GitHub tools
	MCP_SERVER_URL = "https://baction-vulnerability-scanner-server.hf.space/gradio_api/mcp/"

	@tool
	def visit_webpage(url: str) -> str:
	"""Visits a webpage at the given URL and returns its content as a markdown string.

	Args:
	url: The URL of the webpage to visit.

	Returns:
	The content of the webpage converted to Markdown, or an error message if the request fails.
	"""
	try:
	# Add user agent to avoid blocking
	headers = {
	'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
	}

	# Send a GET request to the URL
	response = requests.get(url, headers=headers, timeout=30)
	response.raise_for_status() # Raise an exception for bad status codes

	# Convert the HTML content to Markdown
	markdown_content = markdownify(response.text).strip()

	# Remove multiple line breaks
	markdown_content = re.sub(r"\n{3,}", "\n\n", markdown_content)

	# Limit content length to avoid overwhelming the AI
	if len(markdown_content) > 5000:
	markdown_content = markdown_content[:5000] + "\n\n[Content truncated due to length...]"

	return markdown_content

	except RequestException as e:
	return f"Error fetching the webpage: {str(e)}"
	except Exception as e:
	return f"An unexpected error occurred: {str(e)}"

	def parse_github_url(url):
	"""Parse GitHub URL to extract owner, repo, and file path"""
	# Handle repository URLs
	repo_pattern = r'https://github\.com/([^/]+)/([^/]+)/?$'
	repo_match = re.match(repo_pattern, url.strip())
	if repo_match:
	return repo_match.group(1), repo_match.group(2), None

	# Handle file URLs
	file_pattern = r'https://github\.com/([^/]+)/([^/]+)/blob/[^/]+/(.+)$'
	file_match = re.match(file_pattern, url.strip())
	if file_match:
	return file_match.group(1), file_match.group(2), file_match.group(3)

	return None, None, None

	def analyze_vulnerabilities_multiagent(message, history, hf_token):
	"""Multi-agent vulnerability analysis with web scraping capabilities"""

	# Validate HF token input
	if not hf_token.strip():
	return "❌ Please provide a Hugging Face API key. Get one from [Hugging Face](https://huggingface.co/settings/tokens)"

	try:
	# Parse the GitHub URL
	owner, repo, file_path = parse_github_url(message)

	if not owner or not repo:
	return "❌ Invalid GitHub URL. Please provide a valid GitHub repository or file URL."

	if not file_path:
	return "❌ Please provide a specific file URL for analysis. Repository-wide analysis is not supported in multi-agent mode."

	# Connect to MCP server for GitHub tools
	mcp_client = MCPClient({
	"url": MCP_SERVER_URL,
	"timeout": 120
	})
	github_tools = mcp_client.get_tools()

	# Initialize AI model
	model = InferenceClientModel(token=hf_token.strip())

	# Create a single agent with all tools (simpler approach)
	all_tools = github_tools + [visit_webpage]

	# Create single agent instead of multi-agent to avoid tool_choice issues
	agent = CodeAgent(
	tools=all_tools,
	model=model,
	additional_authorized_imports=["re", "requests"],
	max_steps=12
	)

	# Simplified prompt for single agent analysis
	enhanced_prompt = f"""
	You are a cybersecurity expert. Analyze this GitHub file for security vulnerabilities.

	GitHub URL: {message}
	Repository: {owner}/{repo}
	File Path: {file_path}

	ANALYSIS STEPS:

	1. Get File Data:
	- Use get_repository_info with owner="{owner}", repo="{repo}"
	- Use get_file_content with owner="{owner}", repo="{repo}", path="{file_path}"

	2. Find Vulnerabilities:
	Analyze code for:
	- SQL injection patterns
	- Command injection (eval, exec, os.system)
	- XSS vulnerabilities
	- Path traversal
	- Hardcoded secrets
	- Input validation issues

	3. CVE Research:
	- Search for CVEs: simple_cve_search("SQL injection", 3)
	- Extract CVE IDs from the string result using regex
	- Visit NVD for the first CVE: visit_webpage("https://nvd.nist.gov/vuln/detail/CVE-XXXX-XXXX")
	- Include the full NVD webpage content in your report

	4. Generate Report:

	# 🛡️ Security Analysis Report

	## 🔍 File Overview
	- Path: {file_path}
	- Repository: {owner}/{repo}

	## 🚨 Vulnerabilities Found
	[List vulnerabilities with line numbers]

	## 📊 CVE Research
	Top Related CVE: [First CVE ID from regex extraction]
	CVE Details Webpage Content: [Complete content from visit_webpage call]
	Key Details from CVE Details: [CVSS score, attack vector, impact extracted from webpage]

	## ⚠️ Other Possible CVEs
	[Show other 2 CVE IDs from search]

	## 🛠️ Remediation
	[Specific fixes]

	## ⚠️ Disclaimer
	AI analysis may not be 100% accurate. Manual security review recommended.

	REMEMBER: Always call visit_webpage for the first CVE ID to get detailed CVE information!

	CRITICAL INSTRUCTIONS:
	- simple_cve_search returns a STRING with CVE IDs and descriptions
	- Extract CVE IDs using: re.findall(r'CVE-\d{4}-\d+', cve_search_string)
	- TRY to visit CVE Details webpage for the first CVE ID found (more reliable than NVD)
	- Use this exact pattern:
	1. Call simple_cve_search("SQL injection", 3)
	2. Extract CVE IDs with regex from the returned string
	3. Take the first CVE ID from the list
	4. Call visit_webpage("https://www.cvedetails.com/cve/CVE-YYYY-NNNNN/") with the EXACT CVE ID (keep hyphens)
	5. If webpage fails (403 error), continue with analysis using CVE search results only
	- Keep variable names simple and avoid complex operations
	- ALWAYS use keyword arguments for MCP tools (e.g., owner="user", repo="repo", path="file.py")
	- NOTE: CVE format is standard CVE-YYYY-NNNNN (like CVE-2024-54762)
	- Example: If you get "CVE-2024-54762", visit "https://www.cvedetails.com/cve/CVE-2024-54762/"
	- DO NOT remove hyphens from CVE IDs when visiting CVE Details URLs
	- If CVE Details access fails, use the CVE descriptions from simple_cve_search results
	"""

	# Run the agent analysis
	result = agent.run(enhanced_prompt)

	# Disconnect MCP client
	mcp_client.disconnect()

	return str(result)

	except Exception as e:
	return f"❌ Error in multi-agent analysis: {str(e)}\n\nPlease ensure:\n• Valid GitHub file URL (not repository URL)\n• Hugging Face token is correct\n• File is accessible"

	# Gradio UI
	with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue")) as demo:
	gr.Markdown("## 🛡️ Enhanced GitHub Vulnerability Scanner")
	gr.Markdown("""
	Advanced Security Analysis with Web Scraping

	This intelligent vulnerability scanner uses AI agents with web scraping capabilities to perform comprehensive security analysis of GitHub files.

	Key Features:
	- 🤖 AI Agent System: Single agent with multiple tools for efficient analysis
	- 🌐 Web Scraping: Automatically visits NVD webpages to get detailed CVE information
	- 📊 CVE Database Integration: Searches CVE knowledge base and gets top 3 matches
	- 🔍 Smart Analysis: AI-generated vulnerability descriptions (not hardcoded)
	- 📋 Detailed Reports: Comprehensive reports with NVD data and remediation advice
	- ⚠️ Accuracy Disclaimer: Shows alternative CVEs and warns about AI limitations

	Project Links:
	- 📂 Source Code: [GitHub Repository](https://github.com/banno-0720/vulnerability-scanner)
	- 🔧 MCP Server: [Hugging Face Space](https://huggingface.co/spaces/HimanshuGoyal2004/github-mcp-server)

	⚠️ Important Notice: This tool is designed for legitimate security research and vulnerability assessment purposes only. Do not use this scanner for malicious activities, unauthorized access, or any illegal purposes. Always ensure you have proper authorization before scanning repositories that don't belong to you.
	""")
	gr.Markdown("---")

	# API Configuration Section
	with gr.Row():
	with gr.Column(scale=1):
	gr.Markdown("### 🔑 API Configuration")
	hf_token_box = gr.Textbox(
	label="🤗 Hugging Face API Key",
	placeholder="Enter your Hugging Face API key for AI model access",
	type="password",
	info="🔗 Get your free key: https://huggingface.co/settings/tokens"
	)

	gr.Markdown("---")
	gr.Markdown("### 💬 Enhanced Security Analysis")
	gr.Markdown("Paste a GitHub FILE URL (not repository URL) below to start the enhanced security analysis.")

	# Chatbot Interface
	chatbot = gr.ChatInterface(
	fn=lambda msg, hist, hf_token: analyze_vulnerabilities_multiagent(msg, hist, hf_token),
	additional_inputs=[hf_token_box],
	type="messages",
	examples=[
	["https://github.com/ayushmittal62/vunreability_scanner_testing/blob/master/database/schema.sql", ""],
	["https://github.com/ayushmittal62/vunreability_scanner_testing/blob/master/python/database.py", ""],
	["https://github.com/banno-0720/documentation-agent/blob/main/code.py", ""]
	],
	)

	if __name__ == "__main__":
	demo.launch(server_port=7860) # Different port to avoid conflict with server