Spaces:

Perfect7613
/

Factchecker

Sleeping

App Files Files Community

Factchecker / app.py

Perfect7613

Done

e268dcd verified about 1 year ago

raw

history blame contribute delete

10.1 kB

	import gradio as gr
	import google.generativeai as genai
	from datetime import datetime
	from dataclasses import dataclass
	from typing import List, Dict, Optional, Tuple
	import requests
	import json
	import os
	from dotenv import load_dotenv

	@dataclass
	class Source:
	"""Represents a source used for fact-checking."""
	url: str
	title: str
	content: str
	reputation_score: float

	@dataclass
	class FactCheckResult:
	"""Represents the result of a fact check."""
	claim: str
	verdict: str
	confidence_score: float
	analysis_date: str
	sources: List[Source]
	evidence: List[Dict]
	contradictions: List[Dict]
	explanation: str

	class GeminiFactChecker:
	def __init__(self):
	if not os.getenv("GOOGLE_API_KEY"):
	raise ValueError("GOOGLE_API_KEY environment variable is required")

	genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
	generation_config = genai.types.GenerationConfig(
	temperature=0.1,
	top_p=0.8,
	top_k=40,
	)

	self.model = genai.GenerativeModel(
	model_name='gemini-1.5-pro',
	generation_config=generation_config
	)
	self.search_api_key = os.getenv("SEARCH_API_KEY")
	self.search_engine_id = os.getenv("SEARCH_ENGINE_ID")
	self.jinai_api_key = os.getenv("JINA_AI_API_KEY")
	self.jinai_reader_url = "https://r.jina.ai/"

	def _search_sources(self, claim: str, num_sources: int = 3) -> List[str]:
	try:
	search_url = "https://www.googleapis.com/customsearch/v1"
	params = {
	'key': self.search_api_key,
	'cx': self.search_engine_id,
	'q': claim,
	'num': num_sources
	}
	response = requests.get(search_url, params=params)
	response.raise_for_status()
	search_results = response.json()
	return [item['link'] for item in search_results.get('items', [])]
	except Exception as e:
	print(f"Error searching sources: {str(e)}")
	return []

	def _fetch_webpage_content(self, url: str) -> Optional[dict]:
	try:
	headers = {
	'Accept': 'application/json',
	'Authorization': f'Bearer {self.jinai_api_key}'
	}
	response = requests.get(f"{self.jinai_reader_url}/{url}",
	headers=headers,
	timeout=10)
	response.raise_for_status()

	data = response.json()
	if not data.get('data'):
	return None

	return {
	"content": data['data'].get('content', '')[:5000],
	"title": data['data'].get('title', ''),
	"data": data['data']
	}
	except Exception as e:
	print(f"Error fetching {url}: {str(e)}")
	return None

	def _analyze_evidence(self, claim: str, sources: List[Source]) -> List[Dict]:
	all_evidence = []

	for source in sources:
	prompt = f"""
	Analyze this content and return evidence as JSON array:

	CLAIM: "{claim}"
	SOURCE TITLE: {source.title}
	CONTENT: {source.content[:2000]}

	Return array of evidence objects with properties:
	- text: exact quote or clear paraphrase
	- type: "supporting" or "contradicting"
	- relevance: number 0.0 to 1.0
	- source: source title
	"""

	try:
	response = self.model.generate_content(prompt)
	if response.text:
	clean_text = response.text.strip()
	if clean_text.startswith('```json'):
	clean_text = clean_text[7:-3]
	elif clean_text.startswith('[') and clean_text.endswith(']'):
	clean_text = clean_text

	evidence_list = json.loads(clean_text)
	for evidence in evidence_list:
	evidence["source_score"] = source.reputation_score
	all_evidence.extend(evidence_list)

	except Exception as e:
	print(f"Error analyzing source {source.url}: {str(e)}")
	continue

	return all_evidence

	def check_fact(self, claim: str, num_sources: int = 3) -> Optional[FactCheckResult]:
	try:
	urls = self._search_sources(claim, num_sources)
	if not urls:
	return None

	sources = []
	for url in urls:
	content_dict = self._fetch_webpage_content(url)
	if content_dict:
	sources.append(Source(
	url=url,
	title=content_dict.get("title", url),
	content=content_dict["content"],
	reputation_score=0.8 # Default score
	))

	if not sources:
	return None

	evidence = self._analyze_evidence(claim, sources)

	supporting = [e for e in evidence if e["type"] == "supporting"]
	contradicting = [e for e in evidence if e["type"] == "contradicting"]

	total_support = sum(
	float(e.get("relevance", 0.5)) * float(e.get("source_score", 1))
	for e in supporting
	)

	total_contradiction = sum(
	float(e.get("relevance", 0.5)) * float(e.get("source_score", 1))
	for e in contradicting
	)

	if not evidence:
	verdict = "Insufficient evidence"
	confidence = 0.0
	explanation = "No evidence found from analyzed sources."
	else:
	support_ratio = total_support / (total_support + total_contradiction) if (total_support + total_contradiction) > 0 else 0
	confidence = max(support_ratio, 1 - support_ratio)

	if support_ratio > 0.6:
	verdict = "Likely True" if confidence >= 0.7 else "Somewhat True"
	elif support_ratio < 0.4:
	verdict = "Likely False" if confidence >= 0.7 else "Somewhat False"
	else:
	verdict = "Inconclusive"

	explanation = f"Based on {len(supporting)} supporting and {len(contradicting)} contradicting pieces of evidence."

	return FactCheckResult(
	claim=claim,
	verdict=verdict,
	confidence_score=confidence,
	analysis_date=datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
	sources=sources,
	evidence=supporting,
	contradictions=contradicting,
	explanation=explanation
	)

	except Exception as e:
	print(f"Error during fact checking: {str(e)}")
	return None

	def format_fact_check_report(result: FactCheckResult) -> str:
	report = f"""# Fact Check Report

	## Claim
	"{result.claim}"

	## Verdict: {result.verdict}
	Confidence Score: {result.confidence_score:.2f}

	## Explanation
	{result.explanation}

	## Analysis Summary
	- Number of sources analyzed: {len(result.sources)}
	- Supporting evidence found: {len(result.evidence)}
	- Contradicting points found: {len(result.contradictions)}

	## Sources Analyzed
	"""
	for source in result.sources:
	report += f"- [{source.title}]({source.url}) (Credibility: {source.reputation_score:.2f})\n"

	if result.evidence:
	report += "\n### Supporting Evidence:\n"
	for e in result.evidence[:3]:
	report += f"- {e['text']} (Source: {e['source']})\n"

	if result.contradictions:
	report += "\n### Contradicting Points:\n"
	for c in result.contradictions[:3]:
	report += f"- {c['text']} (Source: {c['source']})\n"

	return report

	def main():
	load_dotenv()
	fact_checker = GeminiFactChecker()

	with gr.Blocks() as demo:
	gr.Markdown("# AI-Powered Fact Checker")
	gr.Markdown("Enter a claim to check its veracity against multiple sources.")

	with gr.Row():
	with gr.Column():
	claim = gr.Textbox(
	label="Claim to Check",
	placeholder="Enter the claim you want to verify...",
	lines=3
	)
	num_sources = gr.Slider(
	label="Number of Sources to Check",
	minimum=1,
	maximum=5,
	value=3,
	step=1
	)
	check_button = gr.Button("Check Claim", variant="primary")

	with gr.Column():
	status = gr.Markdown("Ready to check claims...")
	report = gr.Markdown()

	def check_fact_wrapper(claim: str, num_sources: int):
	status_value = "🔍 Searching and analyzing sources..."
	yield status_value, ""

	try:
	result = fact_checker.check_fact(claim, int(num_sources))
	if result:
	status_value = "✅ Analysis complete!"
	report_value = format_fact_check_report(result)
	else:
	status_value = "❌ Error occurred"
	report_value = "Error occurred during fact checking."
	except Exception as e:
	status_value = "❌ Error occurred"
	report_value = f"Error: {str(e)}"

	yield status_value, report_value

	check_button.click(
	fn=check_fact_wrapper,
	inputs=[claim, num_sources],
	outputs=[status, report],
	show_progress=True
	)

	demo.launch()

	if __name__ == "__main__":
	main()