Spaces:

BonelliLab
/

Eidolon-CognitiveTutor

Sleeping

App Files Files Community

Eidolon-CognitiveTutor / api /papers.py

BonelliLab

feat: Add paper citation system (backend DB + API wiring) and UI research panel

c678388 about 1 month ago

raw

history blame contribute delete

5.89 kB

	"""Lightweight paper database and lookup helpers for research citations."""
	from typing import List, Dict

	# Minimal curated set of influential papers to cite in the demo. Add more as needed.
	RESEARCH_PAPERS: Dict[str, Dict] = {
	"attention": {
	"id": "attention",
	"title": "Attention Is All You Need",
	"authors": "Vaswani et al.",
	"year": 2017,
	"venue": "NeurIPS",
	"url": "https://arxiv.org/abs/1706.03762",
	"summary": "Introduced the Transformer architecture using self-attention; foundational for modern LLMs."
	},
	"rag": {
	"id": "rag",
	"title": "Retrieval-Augmented Generation for Knowledge-Intensive NLP",
	"authors": "Lewis et al.",
	"year": 2020,
	"venue": "NeurIPS",
	"url": "https://arxiv.org/abs/2005.11401",
	"summary": "Combines retrieval with generation to ground answers in external documents."
	},
	"tot": {
	"id": "tot",
	"title": "Tree of Thoughts: Deliberate Problem Solving with Large Language Models",
	"authors": "Yao et al.",
	"year": 2023,
	"venue": "ArXiv/NeurIPS",
	"url": "https://arxiv.org/abs/2305.10601",
	"summary": "Explores branching reasoning strategies to improve complex problem solving."
	},
	"maml": {
	"id": "maml",
	"title": "Model-Agnostic Meta-Learning for Fast Adaptation",
	"authors": "Finn et al.",
	"year": 2017,
	"venue": "ICML",
	"url": "https://arxiv.org/abs/1703.03400",
	"summary": "Meta-learning method for quick adaptation to new tasks with few examples."
	},
	"uncertainty": {
	"id": "uncertainty",
	"title": "Uncertainty in Deep Learning",
	"authors": "Gal",
	"year": 2016,
	"venue": "PhD Thesis / Review",
	"url": "https://arxiv.org/abs/1708.07250",
	"summary": "Surveys Bayesian and practical approaches to uncertainty estimation in neural networks."
	},
	"constitutional": {
	"id": "constitutional",
	"title": "Constitutional AI: Harmlessness from AI Feedback",
	"authors": "Bai et al.",
	"year": 2022,
	"venue": "ArXiv",
	"url": "https://arxiv.org/abs/2212.08073",
	"summary": "Technique for using a set of principles to align model behavior with safety goals."
	},
	"dpo": {
	"id": "dpo",
	"title": "Direct Preference Optimization",
	"authors": "Rafailov et al.",
	"year": 2023,
	"venue": "ArXiv",
	"url": "https://arxiv.org/abs/2305.19343",
	"summary": "Preference optimization method that avoids separate reward model training."
	},
	"gnn": {
	"id": "gnn",
	"title": "A Comprehensive Survey on Graph Neural Networks",
	"authors": "Zhou et al.",
	"year": 2020,
	"venue": "IEEE Transactions",
	"url": "https://arxiv.org/abs/1901.00596",
	"summary": "Survey of graph neural network models and applications, relevant for knowledge graphs."
	},
	"clip": {
	"id": "clip",
	"title": "Learning Transferable Visual Models From Natural Language Supervision (CLIP)",
	"authors": "Radford et al.",
	"year": 2021,
	"venue": "ICML",
	"url": "https://arxiv.org/abs/2103.00020",
	"summary": "Demonstrates strong zero-shot transfer for image-text tasks using contrastive learning."
	},
	"dpr": {
	"id": "dpr",
	"title": "Dense Passage Retrieval for Open-Domain Question Answering",
	"authors": "Karpukhin et al.",
	"year": 2020,
	"venue": "EMNLP",
	"url": "https://arxiv.org/abs/2004.04906",
	"summary": "Dense retrieval technique that improves recall for RAG pipelines."
	}
	}


	def get_relevant_papers(prompt: str = "", mode: str = "standard", top_k: int = 3) -> List[Dict]:
	"""Return a small list of relevant papers for a prompt or mode.

	Heuristic matching: checks for keywords in the prompt and preferred mode
	to return a concise, curated set of citations suitable for display in the UI.
	"""
	prompt_l = (prompt or "").lower()
	keys = set()

	# Mode-based preferences
	if mode == "socratic":
	keys.add("tot")
	if mode == "technical":
	keys.add("attention")
	keys.add("dpr")
	if mode == "code" or "implement" in prompt_l:
	keys.add("dpr")
	keys.add("rag")
	if mode == "analogy":
	keys.add("attention")
	if mode == "eli5":
	keys.add("constitutional")

	# Keyword heuristics
	if any(k in prompt_l for k in ["attention", "transformer", "self-attention"]):
	keys.add("attention")
	if any(k in prompt_l for k in ["retrieval", "rag", "search", "docs", "sources"]):
	keys.add("rag")
	keys.add("dpr")
	if any(k in prompt_l for k in ["reasoning", "tree", "thought", "chain of thought"]):
	keys.add("tot")
	if any(k in prompt_l for k in ["meta", "maml", "adapt", "few-shot"]):
	keys.add("maml")
	if any(k in prompt_l for k in ["uncertainty", "confidence", "calibration"]):
	keys.add("uncertainty")
	if any(k in prompt_l for k in ["safety", "harm", "alignment", "constitutional"]):
	keys.add("constitutional")
	if any(k in prompt_l for k in ["graph", "knowledge graph", "gnn"]):
	keys.add("gnn")
	if any(k in prompt_l for k in ["image", "vision", "clip"]):
	keys.add("clip")

	# If no keys were found, return general high-impact papers
	if not keys:
	keys.update(["attention", "rag", "dpr"])

	# Build list preserving some priority: attention -> rag -> dpr -> others
	priority = ["attention", "rag", "dpr", "tot", "maml", "uncertainty", "constitutional", "dpo", "gnn", "clip"]
	selected = [RESEARCH_PAPERS[k] for k in priority if k in keys and k in RESEARCH_PAPERS]

	return selected[:top_k]