File size: 5,891 Bytes
c678388
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
"""Lightweight paper database and lookup helpers for research citations."""
from typing import List, Dict

# Minimal curated set of influential papers to cite in the demo. Add more as needed.
RESEARCH_PAPERS: Dict[str, Dict] = {
    "attention": {
        "id": "attention",
        "title": "Attention Is All You Need",
        "authors": "Vaswani et al.",
        "year": 2017,
        "venue": "NeurIPS",
        "url": "https://arxiv.org/abs/1706.03762",
        "summary": "Introduced the Transformer architecture using self-attention; foundational for modern LLMs."
    },
    "rag": {
        "id": "rag",
        "title": "Retrieval-Augmented Generation for Knowledge-Intensive NLP",
        "authors": "Lewis et al.",
        "year": 2020,
        "venue": "NeurIPS",
        "url": "https://arxiv.org/abs/2005.11401",
        "summary": "Combines retrieval with generation to ground answers in external documents."
    },
    "tot": {
        "id": "tot",
        "title": "Tree of Thoughts: Deliberate Problem Solving with Large Language Models",
        "authors": "Yao et al.",
        "year": 2023,
        "venue": "ArXiv/NeurIPS",
        "url": "https://arxiv.org/abs/2305.10601",
        "summary": "Explores branching reasoning strategies to improve complex problem solving."
    },
    "maml": {
        "id": "maml",
        "title": "Model-Agnostic Meta-Learning for Fast Adaptation",
        "authors": "Finn et al.",
        "year": 2017,
        "venue": "ICML",
        "url": "https://arxiv.org/abs/1703.03400",
        "summary": "Meta-learning method for quick adaptation to new tasks with few examples."
    },
    "uncertainty": {
        "id": "uncertainty",
        "title": "Uncertainty in Deep Learning",
        "authors": "Gal",
        "year": 2016,
        "venue": "PhD Thesis / Review",
        "url": "https://arxiv.org/abs/1708.07250",
        "summary": "Surveys Bayesian and practical approaches to uncertainty estimation in neural networks."
    },
    "constitutional": {
        "id": "constitutional",
        "title": "Constitutional AI: Harmlessness from AI Feedback",
        "authors": "Bai et al.",
        "year": 2022,
        "venue": "ArXiv",
        "url": "https://arxiv.org/abs/2212.08073",
        "summary": "Technique for using a set of principles to align model behavior with safety goals."
    },
    "dpo": {
        "id": "dpo",
        "title": "Direct Preference Optimization",
        "authors": "Rafailov et al.",
        "year": 2023,
        "venue": "ArXiv",
        "url": "https://arxiv.org/abs/2305.19343",
        "summary": "Preference optimization method that avoids separate reward model training."
    },
    "gnn": {
        "id": "gnn",
        "title": "A Comprehensive Survey on Graph Neural Networks",
        "authors": "Zhou et al.",
        "year": 2020,
        "venue": "IEEE Transactions",
        "url": "https://arxiv.org/abs/1901.00596",
        "summary": "Survey of graph neural network models and applications, relevant for knowledge graphs."
    },
    "clip": {
        "id": "clip",
        "title": "Learning Transferable Visual Models From Natural Language Supervision (CLIP)",
        "authors": "Radford et al.",
        "year": 2021,
        "venue": "ICML",
        "url": "https://arxiv.org/abs/2103.00020",
        "summary": "Demonstrates strong zero-shot transfer for image-text tasks using contrastive learning."
    },
    "dpr": {
        "id": "dpr",
        "title": "Dense Passage Retrieval for Open-Domain Question Answering",
        "authors": "Karpukhin et al.",
        "year": 2020,
        "venue": "EMNLP",
        "url": "https://arxiv.org/abs/2004.04906",
        "summary": "Dense retrieval technique that improves recall for RAG pipelines."
    }
}


def get_relevant_papers(prompt: str = "", mode: str = "standard", top_k: int = 3) -> List[Dict]:
    """Return a small list of relevant papers for a prompt or mode.

    Heuristic matching: checks for keywords in the prompt and preferred mode
    to return a concise, curated set of citations suitable for display in the UI.
    """
    prompt_l = (prompt or "").lower()
    keys = set()

    # Mode-based preferences
    if mode == "socratic":
        keys.add("tot")
    if mode == "technical":
        keys.add("attention")
        keys.add("dpr")
    if mode == "code" or "implement" in prompt_l:
        keys.add("dpr")
        keys.add("rag")
    if mode == "analogy":
        keys.add("attention")
    if mode == "eli5":
        keys.add("constitutional")

    # Keyword heuristics
    if any(k in prompt_l for k in ["attention", "transformer", "self-attention"]):
        keys.add("attention")
    if any(k in prompt_l for k in ["retrieval", "rag", "search", "docs", "sources"]):
        keys.add("rag")
        keys.add("dpr")
    if any(k in prompt_l for k in ["reasoning", "tree", "thought", "chain of thought"]):
        keys.add("tot")
    if any(k in prompt_l for k in ["meta", "maml", "adapt", "few-shot"]):
        keys.add("maml")
    if any(k in prompt_l for k in ["uncertainty", "confidence", "calibration"]):
        keys.add("uncertainty")
    if any(k in prompt_l for k in ["safety", "harm", "alignment", "constitutional"]):
        keys.add("constitutional")
    if any(k in prompt_l for k in ["graph", "knowledge graph", "gnn"]):
        keys.add("gnn")
    if any(k in prompt_l for k in ["image", "vision", "clip"]):
        keys.add("clip")

    # If no keys were found, return general high-impact papers
    if not keys:
        keys.update(["attention", "rag", "dpr"])

    # Build list preserving some priority: attention -> rag -> dpr -> others
    priority = ["attention", "rag", "dpr", "tot", "maml", "uncertainty", "constitutional", "dpo", "gnn", "clip"]
    selected = [RESEARCH_PAPERS[k] for k in priority if k in keys and k in RESEARCH_PAPERS]

    return selected[:top_k]