BonelliLab commited on
Commit
b3fc910
·
1 Parent(s): f37a598

Fix: Make app.py safe for Hugging Face Spaces - no model load at import time

Browse files
Files changed (1) hide show
  1. app.py +85 -11
app.py CHANGED
@@ -1,23 +1,97 @@
1
  import gradio as gr
2
- from cognitive_llm import CognitiveLLM
 
 
3
 
4
- # Initialize the cognitive tutor
5
- _tutor = CognitiveLLM()
 
 
 
 
 
6
 
7
 
8
- def ask(question: str) -> str:
9
- """Generate a response from the cognitive tutor."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  if not question or not question.strip():
11
  return "Please enter a question for the tutor."
12
- return _tutor.generate(question.strip())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
 
14
 
15
  iface = gr.Interface(
16
- fn=ask,
17
- inputs=gr.Textbox(label="Ask the tutor", placeholder="Enter your question here"),
18
- outputs=gr.Textbox(label="Tutor response"),
19
- title="Eidolon Cognitive Tutor",
20
- description="Retrieval-augmented cognitive tutoring powered by Qwen3."
 
 
 
 
 
 
 
 
 
 
 
21
  )
22
 
23
 
 
1
  import gradio as gr
2
+ import os
3
+ import httpx
4
+ from typing import Optional
5
 
6
+ # If an external inference endpoint is configured, the Gradio app will proxy to it.
7
+ # Otherwise, if DEMO_MODE is set, the app will run in safe demo mode that does not
8
+ # attempt to load large model weights. When neither is set, the app will try to
9
+ # instantiate the local CognitiveLLM (for developers running locally with the model).
10
+ INFERENCE_API_URL = os.getenv("INFERENCE_API_URL")
11
+ INFERENCE_API_KEY = os.getenv("INFERENCE_API_KEY")
12
+ DEMO_MODE = os.getenv("DEMO_MODE", "0").lower() in ("1", "true", "yes")
13
 
14
 
15
+ def _demo_reply(prompt: str) -> str:
16
+ """Generate deterministic demo responses."""
17
+ p = (prompt or "").strip()
18
+ if not p:
19
+ return "Please enter a question for the demo tutor."
20
+ if "explain" in p.lower() or "what is" in p.lower():
21
+ return f"**Demo Explanation:**\n\nHere's a concise explanation for your question: *\"{p}\"*.\n\n[Demo mode active. Configure `INFERENCE_API_URL` to use a real model.]"
22
+ if "code" in p.lower() or "how to" in p.lower() or "implement" in p.lower():
23
+ return f"**Demo Steps:**\n\n1. Understand the problem: *\"{p}\"*\n2. Break it down into smaller steps\n3. Implement and test\n4. Iterate and refine\n\n[Demo-mode response]"
24
+ if "compare" in p.lower() or "difference" in p.lower():
25
+ return f"**Demo Comparison:**\n\nKey differences related to *\"{p}\"*:\n- Point A vs Point B\n- Tradeoffs and use cases\n\n[Demo mode]"
26
+ return f"**Demo Response:**\n\nI understood your prompt: *\"{p}\"*.\n\nThis is a demo response showing how the tutor would reply. Set `INFERENCE_API_URL` to enable real model inference."
27
+
28
+
29
+ def ask_sync(question: str) -> str:
30
+ """Handle question answering with demo mode, inference API, or local model fallback."""
31
  if not question or not question.strip():
32
  return "Please enter a question for the tutor."
33
+
34
+ question = question.strip()
35
+
36
+ # Try inference API first if configured
37
+ if INFERENCE_API_URL:
38
+ try:
39
+ headers = {"Content-Type": "application/json"}
40
+ if INFERENCE_API_KEY:
41
+ headers["Authorization"] = f"Bearer {INFERENCE_API_KEY}"
42
+
43
+ resp = httpx.post(
44
+ INFERENCE_API_URL,
45
+ json={"inputs": question},
46
+ headers=headers,
47
+ timeout=60.0
48
+ )
49
+ resp.raise_for_status()
50
+ data = resp.json()
51
+
52
+ # Normalize response
53
+ if isinstance(data, list) and len(data) > 0:
54
+ first = data[0]
55
+ if isinstance(first, dict) and "generated_text" in first:
56
+ return first["generated_text"]
57
+ if isinstance(first, str):
58
+ return first
59
+ if isinstance(data, dict) and "generated_text" in data:
60
+ return data["generated_text"]
61
+ return str(data)
62
+ except Exception as e:
63
+ return f"⚠️ Inference API error: {e}\n\nFalling back to demo mode..."
64
+
65
+ # Demo mode
66
+ if DEMO_MODE:
67
+ return _demo_reply(question)
68
+
69
+ # Fallback to local model (only for developers with model weights)
70
+ try:
71
+ from cognitive_llm import CognitiveLLM
72
+ tutor = CognitiveLLM()
73
+ return tutor.generate(question)
74
+ except Exception as e:
75
+ return f"❌ Local model unavailable and no `INFERENCE_API_URL` configured.\n\nError: {e}\n\n**To fix this:**\n- Set `DEMO_MODE=1` for demo responses, or\n- Set `INFERENCE_API_URL` to use a hosted inference endpoint"
76
 
77
 
78
  iface = gr.Interface(
79
+ fn=ask_sync,
80
+ inputs=gr.Textbox(
81
+ label="Ask the tutor",
82
+ placeholder="Enter your question here (e.g., 'Explain Newton's laws')",
83
+ lines=3
84
+ ),
85
+ outputs=gr.Textbox(label="Tutor response", lines=10),
86
+ title="🧠 Eidolon Cognitive Tutor",
87
+ description="Interactive tutor demo. Running in **demo mode** by default (set `DEMO_MODE=1` or configure `INFERENCE_API_URL` for real inference).",
88
+ examples=[
89
+ ["Explain Newton's laws in simple terms"],
90
+ ["How do I implement a binary search in Python?"],
91
+ ["Compare supervised vs unsupervised learning"],
92
+ ["What is the difference between HTTP and HTTPS?"]
93
+ ],
94
+ theme="soft"
95
  )
96
 
97