Spaces:
Sleeping
Sleeping
Commit
·
b3fc910
1
Parent(s):
f37a598
Fix: Make app.py safe for Hugging Face Spaces - no model load at import time
Browse files
app.py
CHANGED
|
@@ -1,23 +1,97 @@
|
|
| 1 |
import gradio as gr
|
| 2 |
-
|
|
|
|
|
|
|
| 3 |
|
| 4 |
-
#
|
| 5 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
|
| 7 |
|
| 8 |
-
def
|
| 9 |
-
"""Generate
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
if not question or not question.strip():
|
| 11 |
return "Please enter a question for the tutor."
|
| 12 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
|
| 14 |
|
| 15 |
iface = gr.Interface(
|
| 16 |
-
fn=
|
| 17 |
-
inputs=gr.Textbox(
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21 |
)
|
| 22 |
|
| 23 |
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
+
import os
|
| 3 |
+
import httpx
|
| 4 |
+
from typing import Optional
|
| 5 |
|
| 6 |
+
# If an external inference endpoint is configured, the Gradio app will proxy to it.
|
| 7 |
+
# Otherwise, if DEMO_MODE is set, the app will run in safe demo mode that does not
|
| 8 |
+
# attempt to load large model weights. When neither is set, the app will try to
|
| 9 |
+
# instantiate the local CognitiveLLM (for developers running locally with the model).
|
| 10 |
+
INFERENCE_API_URL = os.getenv("INFERENCE_API_URL")
|
| 11 |
+
INFERENCE_API_KEY = os.getenv("INFERENCE_API_KEY")
|
| 12 |
+
DEMO_MODE = os.getenv("DEMO_MODE", "0").lower() in ("1", "true", "yes")
|
| 13 |
|
| 14 |
|
| 15 |
+
def _demo_reply(prompt: str) -> str:
|
| 16 |
+
"""Generate deterministic demo responses."""
|
| 17 |
+
p = (prompt or "").strip()
|
| 18 |
+
if not p:
|
| 19 |
+
return "Please enter a question for the demo tutor."
|
| 20 |
+
if "explain" in p.lower() or "what is" in p.lower():
|
| 21 |
+
return f"**Demo Explanation:**\n\nHere's a concise explanation for your question: *\"{p}\"*.\n\n[Demo mode active. Configure `INFERENCE_API_URL` to use a real model.]"
|
| 22 |
+
if "code" in p.lower() or "how to" in p.lower() or "implement" in p.lower():
|
| 23 |
+
return f"**Demo Steps:**\n\n1. Understand the problem: *\"{p}\"*\n2. Break it down into smaller steps\n3. Implement and test\n4. Iterate and refine\n\n[Demo-mode response]"
|
| 24 |
+
if "compare" in p.lower() or "difference" in p.lower():
|
| 25 |
+
return f"**Demo Comparison:**\n\nKey differences related to *\"{p}\"*:\n- Point A vs Point B\n- Tradeoffs and use cases\n\n[Demo mode]"
|
| 26 |
+
return f"**Demo Response:**\n\nI understood your prompt: *\"{p}\"*.\n\nThis is a demo response showing how the tutor would reply. Set `INFERENCE_API_URL` to enable real model inference."
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
def ask_sync(question: str) -> str:
|
| 30 |
+
"""Handle question answering with demo mode, inference API, or local model fallback."""
|
| 31 |
if not question or not question.strip():
|
| 32 |
return "Please enter a question for the tutor."
|
| 33 |
+
|
| 34 |
+
question = question.strip()
|
| 35 |
+
|
| 36 |
+
# Try inference API first if configured
|
| 37 |
+
if INFERENCE_API_URL:
|
| 38 |
+
try:
|
| 39 |
+
headers = {"Content-Type": "application/json"}
|
| 40 |
+
if INFERENCE_API_KEY:
|
| 41 |
+
headers["Authorization"] = f"Bearer {INFERENCE_API_KEY}"
|
| 42 |
+
|
| 43 |
+
resp = httpx.post(
|
| 44 |
+
INFERENCE_API_URL,
|
| 45 |
+
json={"inputs": question},
|
| 46 |
+
headers=headers,
|
| 47 |
+
timeout=60.0
|
| 48 |
+
)
|
| 49 |
+
resp.raise_for_status()
|
| 50 |
+
data = resp.json()
|
| 51 |
+
|
| 52 |
+
# Normalize response
|
| 53 |
+
if isinstance(data, list) and len(data) > 0:
|
| 54 |
+
first = data[0]
|
| 55 |
+
if isinstance(first, dict) and "generated_text" in first:
|
| 56 |
+
return first["generated_text"]
|
| 57 |
+
if isinstance(first, str):
|
| 58 |
+
return first
|
| 59 |
+
if isinstance(data, dict) and "generated_text" in data:
|
| 60 |
+
return data["generated_text"]
|
| 61 |
+
return str(data)
|
| 62 |
+
except Exception as e:
|
| 63 |
+
return f"⚠️ Inference API error: {e}\n\nFalling back to demo mode..."
|
| 64 |
+
|
| 65 |
+
# Demo mode
|
| 66 |
+
if DEMO_MODE:
|
| 67 |
+
return _demo_reply(question)
|
| 68 |
+
|
| 69 |
+
# Fallback to local model (only for developers with model weights)
|
| 70 |
+
try:
|
| 71 |
+
from cognitive_llm import CognitiveLLM
|
| 72 |
+
tutor = CognitiveLLM()
|
| 73 |
+
return tutor.generate(question)
|
| 74 |
+
except Exception as e:
|
| 75 |
+
return f"❌ Local model unavailable and no `INFERENCE_API_URL` configured.\n\nError: {e}\n\n**To fix this:**\n- Set `DEMO_MODE=1` for demo responses, or\n- Set `INFERENCE_API_URL` to use a hosted inference endpoint"
|
| 76 |
|
| 77 |
|
| 78 |
iface = gr.Interface(
|
| 79 |
+
fn=ask_sync,
|
| 80 |
+
inputs=gr.Textbox(
|
| 81 |
+
label="Ask the tutor",
|
| 82 |
+
placeholder="Enter your question here (e.g., 'Explain Newton's laws')",
|
| 83 |
+
lines=3
|
| 84 |
+
),
|
| 85 |
+
outputs=gr.Textbox(label="Tutor response", lines=10),
|
| 86 |
+
title="🧠 Eidolon Cognitive Tutor",
|
| 87 |
+
description="Interactive tutor demo. Running in **demo mode** by default (set `DEMO_MODE=1` or configure `INFERENCE_API_URL` for real inference).",
|
| 88 |
+
examples=[
|
| 89 |
+
["Explain Newton's laws in simple terms"],
|
| 90 |
+
["How do I implement a binary search in Python?"],
|
| 91 |
+
["Compare supervised vs unsupervised learning"],
|
| 92 |
+
["What is the difference between HTTP and HTTPS?"]
|
| 93 |
+
],
|
| 94 |
+
theme="soft"
|
| 95 |
)
|
| 96 |
|
| 97 |
|