Update app.py
Browse files
app.py
CHANGED
|
@@ -1226,11 +1226,21 @@ class Hive:
|
|
| 1226 |
if torch and torch.cuda.is_available() and self.device == "cuda":
|
| 1227 |
kwargs.update(dict(torch_dtype=torch.float16))
|
| 1228 |
|
| 1229 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1230 |
if use_remote:
|
| 1231 |
-
|
| 1232 |
-
endpoint = CFG["HIVE_HF_ENDPOINT"] or None
|
| 1233 |
-
token = CFG["HF_READ_TOKEN"] or os.getenv("HF_TOKEN") or os.getenv("HUGGING_FACE_HUB_TOKEN") or None
|
| 1234 |
self.client = InferenceClient(model=self.model_id if endpoint is None else None, token=token, timeout=60, base_url=endpoint)
|
| 1235 |
def _remote_pipe(prompt, max_new_tokens=256, do_sample=True, temperature=0.7, **kw):
|
| 1236 |
stop = kw.get("stop_sequences") or ["</s>", "Assistant:"]
|
|
@@ -1242,6 +1252,7 @@ class Hive:
|
|
| 1242 |
self.model = None # No local model needed
|
| 1243 |
self.stopping_criteria = None # Not used with InferenceClient
|
| 1244 |
else:
|
|
|
|
| 1245 |
self.tok = AutoTokenizer.from_pretrained(self.model_id, trust_remote_code=trust, chat_template=None)
|
| 1246 |
if self.tok.pad_token is None:
|
| 1247 |
self.tok.pad_token = self.tok.eos_token
|
|
@@ -1488,27 +1499,27 @@ def launch_ui(bootstrap_instance: "Bootstrap"):
|
|
| 1488 |
|
| 1489 |
with gr.Blocks(title="Hive π Full Merged Optimized") as demo:
|
| 1490 |
with gr.Row():
|
| 1491 |
-
with gr.Column(scale=
|
| 1492 |
gr.Markdown(f"## {CFG['AGENT_NAME']} π")
|
| 1493 |
core_status = gr.Markdown("β³ **Initializing Full Hive Core...** You can chat with the Lite model now. Advanced features will be enabled shortly.") # type: ignore
|
| 1494 |
chatbot=gr.Chatbot(height=600, type="messages")
|
| 1495 |
-
msg=gr.Textbox(placeholder=f"Talk to {CFG['AGENT_NAME']} (Lite Mode)", interactive=True, show_label=False, container=False)
|
| 1496 |
|
| 1497 |
with gr.Column(scale=1, min_width=300):
|
| 1498 |
with gr.Sidebar():
|
| 1499 |
uid_state=gr.State(None); role_state=gr.State("guest"); mode_state=gr.State("user"); phonics_state=gr.State(False)
|
| 1500 |
-
with gr.Accordion("π§ Memory Summary", open=False) as memory_accordion:
|
| 1501 |
summary_output = gr.Markdown("Click the button to generate a summary of your recent conversations.")
|
| 1502 |
summary_btn = gr.Button("Show Memory Summary")
|
| 1503 |
|
| 1504 |
-
with gr.Accordion("π Vocabulary Trainer", open=False) as vocab_accordion:
|
| 1505 |
vocab_output = gr.Markdown("Click to get a new vocabulary word from your conversations.")
|
| 1506 |
vocab_btn = gr.Button("Get New Word")
|
| 1507 |
|
| 1508 |
-
with gr.Accordion("π Progress Board", open=False) as progress_accordion:
|
| 1509 |
progress_output = gr.Markdown("Your progress will be shown here.")
|
| 1510 |
|
| 1511 |
-
with gr.Accordion("Settings & Admin Controls", open=
|
| 1512 |
with gr.Tabs():
|
| 1513 |
with gr.TabItem("Login & Profile") as login_tab:
|
| 1514 |
with gr.Row():
|
|
@@ -1588,37 +1599,42 @@ def launch_ui(bootstrap_instance: "Bootstrap"):
|
|
| 1588 |
propose_btn=gr.Button("Propose"); test_btn=gr.Button("Test in sandbox"); apply_btn=gr.Button("Apply (policy-checked)", elem_id="apply_btn")
|
| 1589 |
opt_out=gr.JSON(label="Result")
|
| 1590 |
|
| 1591 |
-
with gr.Accordion("Vocal Chat (Hands-Free)", open=False) as vocal_accordion:
|
| 1592 |
vocal_chat_state = gr.State({"active": False, "user_audio": None, "last_chunk_time": 0})
|
| 1593 |
vocal_chat_btn = gr.Button("Start Vocal Chat")
|
| 1594 |
vocal_chat_status = gr.Markdown("Status: Inactive")
|
| 1595 |
vocal_mic = gr.Audio(sources=["microphone"], streaming=True, visible=False, autoplay=True)
|
| 1596 |
|
| 1597 |
-
with gr.Accordion("Help", open=
|
| 1598 |
gr.Markdown(HELP)
|
| 1599 |
|
| 1600 |
-
# --- Event Handlers ---
|
| 1601 |
|
| 1602 |
def _sanitize_input(text: str) -> str:
|
| 1603 |
"""Removes control characters and leading/trailing whitespace."""
|
| 1604 |
if not text: return ""
|
| 1605 |
return "".join(ch for ch in text if unicodedata.category(ch)[0] != "C").strip()
|
| 1606 |
|
| 1607 |
-
def talk(m, uid, role, mode, hist): # type: ignore
|
| 1608 |
-
hive_instance = get_hive_instance() # type: ignore
|
| 1609 |
eff = role if mode == "admin" else "user"
|
| 1610 |
|
| 1611 |
sanitized_m = _sanitize_input(m)
|
| 1612 |
if not sanitized_m:
|
| 1613 |
hist.append([m, "Please provide a message."])
|
| 1614 |
-
yield hist, ""
|
| 1615 |
return
|
| 1616 |
|
| 1617 |
hist.append([sanitized_m, ""])
|
| 1618 |
-
yield hist, "" # Clear input box immediately
|
| 1619 |
|
| 1620 |
-
|
| 1621 |
final_message, intent = hive_instance._prepare_chat_input(sanitized_m, "en", False, None)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1622 |
_, max_tokens = _prepare_chat_logic(sanitized_m) # Keep this for token sizing
|
| 1623 |
|
| 1624 |
# --- Streaming Response ---
|
|
@@ -1627,7 +1643,7 @@ def launch_ui(bootstrap_instance: "Bootstrap"):
|
|
| 1627 |
for token in hive_instance.chat_stream(full_prompt, max_new_tokens=max_tokens, temperature=0.7):
|
| 1628 |
full_reply += token
|
| 1629 |
hist[-1][1] = full_reply
|
| 1630 |
-
yield hist, ""
|
| 1631 |
|
| 1632 |
# This block should only run in full mode.
|
| 1633 |
if not hive_instance.lite_mode:
|
|
@@ -1640,8 +1656,8 @@ def launch_ui(bootstrap_instance: "Bootstrap"):
|
|
| 1640 |
|
| 1641 |
# Final update to history
|
| 1642 |
hist[-1][1] = hive_instance._postprocess_and_log(full_reply, sanitized_m, eff, uid, intent, [])
|
| 1643 |
-
yield hist, ""
|
| 1644 |
-
msg.submit(talk, [msg, uid_state, role_state, mode_state, chatbot], [chatbot, msg])
|
| 1645 |
|
| 1646 |
def do_memory_summary(uid):
|
| 1647 |
hive_instance = get_hive_instance()
|
|
@@ -1681,15 +1697,15 @@ def launch_ui(bootstrap_instance: "Bootstrap"):
|
|
| 1681 |
get_hive_instance()
|
| 1682 |
return (
|
| 1683 |
"β
**Full Hive Core is Ready.**",
|
| 1684 |
-
gr.Textbox(placeholder=f"Talk to {CFG['AGENT_NAME']}", interactive=True),
|
| 1685 |
-
gr.Accordion(
|
| 1686 |
-
gr.Accordion(
|
| 1687 |
-
gr.Accordion(
|
| 1688 |
-
gr.Accordion(
|
| 1689 |
-
gr.Accordion(
|
| 1690 |
-
gr.Accordion(
|
| 1691 |
)
|
| 1692 |
-
demo.load(wait_for_hive_core, [], [core_status, msg, memory_accordion, vocab_accordion, progress_accordion, settings_accordion, vocal_accordion, help_accordion])
|
| 1693 |
|
| 1694 |
def do_login(nm,pw,sec):
|
| 1695 |
ok, info=attempt_login(nm or "", pw or "", sec or None)
|
|
@@ -1918,16 +1934,16 @@ def launch_ui(bootstrap_instance: "Bootstrap"):
|
|
| 1918 |
# This function has a potential issue if get_hive_instance() returns a lite instance.
|
| 1919 |
def compress_memory(h):
|
| 1920 |
if h.lite_mode or not hasattr(h, 'store'):
|
| 1921 |
-
return "Memory compression is
|
| 1922 |
ok,msg= _archive_memory(str(h.store.dir))
|
| 1923 |
return msg
|
| 1924 |
mem_compress_btn.click(lambda: compress_memory(get_hive_instance()), [], [compress_status])
|
| 1925 |
|
| 1926 |
def do_hotpatch(patch_json):
|
| 1927 |
try: patch=json.loads(patch_json)
|
| 1928 |
-
except Exception: return "
|
| 1929 |
hive_instance = get_hive_instance()
|
| 1930 |
-
if hive_instance.lite_mode or not hasattr(hive_instance, 'overlay'): return "Hotpatching is
|
| 1931 |
ok,msg=hive_instance.overlay.patch(patch,get_hive_instance()) # type: ignore
|
| 1932 |
return msg
|
| 1933 |
hotpatch_apply.click(do_hotpatch,[hotpatch_patch],[hotpatch_status])
|
|
@@ -1970,6 +1986,7 @@ class Bootstrap:
|
|
| 1970 |
self.hive_instance: Optional[Hive] = None
|
| 1971 |
self.hive_lite_instance: Optional[Hive] = None
|
| 1972 |
self.hive_ready = threading.Event()
|
|
|
|
| 1973 |
self.env: Optional[Dict] = None
|
| 1974 |
self.app: Optional[gr.Blocks] = None
|
| 1975 |
self.ui_thread: Optional[threading.Thread] = None
|
|
@@ -2011,6 +2028,14 @@ class Bootstrap:
|
|
| 2011 |
self.hive_lite_instance = Hive(lite=True)
|
| 2012 |
print("[Bootstrap] Lite Hive core is ready.")
|
| 2013 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2014 |
# Now initialize the full instance in the background. This is the slow part.
|
| 2015 |
def full_init_task():
|
| 2016 |
print("[Bootstrap] Initializing Full Hive core in background...")
|
|
@@ -2023,6 +2048,7 @@ class Bootstrap:
|
|
| 2023 |
self.ui_thread = threading.Thread(target=self.launch, daemon=True)
|
| 2024 |
|
| 2025 |
threading.Thread(target=full_init_task, daemon=True).start()
|
|
|
|
| 2026 |
|
| 2027 |
# Register graceful shutdown handler and launch the UI in the main thread
|
| 2028 |
import signal
|
|
|
|
| 1226 |
if torch and torch.cuda.is_available() and self.device == "cuda":
|
| 1227 |
kwargs.update(dict(torch_dtype=torch.float16))
|
| 1228 |
|
| 1229 |
+
# --- Automatic Inference Mode Switching ---
|
| 1230 |
+
# Default to local inference for Pi/local machines, remote for HF Spaces.
|
| 1231 |
+
# This can be manually overridden by setting HIVE_USE_HF_INFERENCE.
|
| 1232 |
+
is_hf_space = "SPACE_ID" in os.environ
|
| 1233 |
+
use_remote_default = is_hf_space
|
| 1234 |
+
|
| 1235 |
+
# Check for manual override from environment variable
|
| 1236 |
+
if "HIVE_USE_HF_INFERENCE" in os.environ:
|
| 1237 |
+
use_remote = CFG["HIVE_USE_HF_INFERENCE"]
|
| 1238 |
+
else:
|
| 1239 |
+
use_remote = use_remote_default
|
| 1240 |
+
|
| 1241 |
if use_remote:
|
| 1242 |
+
print("[Hive] Using remote Hugging Face Inference endpoint.")
|
| 1243 |
+
from huggingface_hub import InferenceClient; endpoint = CFG["HIVE_HF_ENDPOINT"] or None; token = CFG["HF_READ_TOKEN"] or os.getenv("HF_TOKEN") or os.getenv("HUGGING_FACE_HUB_TOKEN") or None
|
|
|
|
| 1244 |
self.client = InferenceClient(model=self.model_id if endpoint is None else None, token=token, timeout=60, base_url=endpoint)
|
| 1245 |
def _remote_pipe(prompt, max_new_tokens=256, do_sample=True, temperature=0.7, **kw):
|
| 1246 |
stop = kw.get("stop_sequences") or ["</s>", "Assistant:"]
|
|
|
|
| 1252 |
self.model = None # No local model needed
|
| 1253 |
self.stopping_criteria = None # Not used with InferenceClient
|
| 1254 |
else:
|
| 1255 |
+
print("[Hive] Using local LLM for inference.")
|
| 1256 |
self.tok = AutoTokenizer.from_pretrained(self.model_id, trust_remote_code=trust, chat_template=None)
|
| 1257 |
if self.tok.pad_token is None:
|
| 1258 |
self.tok.pad_token = self.tok.eos_token
|
|
|
|
| 1499 |
|
| 1500 |
with gr.Blocks(title="Hive π Full Merged Optimized") as demo:
|
| 1501 |
with gr.Row():
|
| 1502 |
+
with gr.Column(scale=4):
|
| 1503 |
gr.Markdown(f"## {CFG['AGENT_NAME']} π")
|
| 1504 |
core_status = gr.Markdown("β³ **Initializing Full Hive Core...** You can chat with the Lite model now. Advanced features will be enabled shortly.") # type: ignore
|
| 1505 |
chatbot=gr.Chatbot(height=600, type="messages")
|
| 1506 |
+
msg=gr.Textbox(placeholder=f"Talk to {CFG['AGENT_NAME']} (Lite Mode)", interactive=True, show_label=False, container=False, scale=4)
|
| 1507 |
|
| 1508 |
with gr.Column(scale=1, min_width=300):
|
| 1509 |
with gr.Sidebar():
|
| 1510 |
uid_state=gr.State(None); role_state=gr.State("guest"); mode_state=gr.State("user"); phonics_state=gr.State(False)
|
| 1511 |
+
with gr.Accordion("π§ Memory Summary", open=False, visible=False) as memory_accordion:
|
| 1512 |
summary_output = gr.Markdown("Click the button to generate a summary of your recent conversations.")
|
| 1513 |
summary_btn = gr.Button("Show Memory Summary")
|
| 1514 |
|
| 1515 |
+
with gr.Accordion("π Vocabulary Trainer", open=False, visible=False) as vocab_accordion:
|
| 1516 |
vocab_output = gr.Markdown("Click to get a new vocabulary word from your conversations.")
|
| 1517 |
vocab_btn = gr.Button("Get New Word")
|
| 1518 |
|
| 1519 |
+
with gr.Accordion("π Progress Board", open=False, visible=False) as progress_accordion:
|
| 1520 |
progress_output = gr.Markdown("Your progress will be shown here.")
|
| 1521 |
|
| 1522 |
+
with gr.Accordion("Settings & Admin Controls", open=True) as settings_accordion:
|
| 1523 |
with gr.Tabs():
|
| 1524 |
with gr.TabItem("Login & Profile") as login_tab:
|
| 1525 |
with gr.Row():
|
|
|
|
| 1599 |
propose_btn=gr.Button("Propose"); test_btn=gr.Button("Test in sandbox"); apply_btn=gr.Button("Apply (policy-checked)", elem_id="apply_btn")
|
| 1600 |
opt_out=gr.JSON(label="Result")
|
| 1601 |
|
| 1602 |
+
with gr.Accordion("Vocal Chat (Hands-Free)", open=False, visible=False) as vocal_accordion:
|
| 1603 |
vocal_chat_state = gr.State({"active": False, "user_audio": None, "last_chunk_time": 0})
|
| 1604 |
vocal_chat_btn = gr.Button("Start Vocal Chat")
|
| 1605 |
vocal_chat_status = gr.Markdown("Status: Inactive")
|
| 1606 |
vocal_mic = gr.Audio(sources=["microphone"], streaming=True, visible=False, autoplay=True)
|
| 1607 |
|
| 1608 |
+
with gr.Accordion("Help", open=True) as help_accordion:
|
| 1609 |
gr.Markdown(HELP)
|
| 1610 |
|
| 1611 |
+
# --- Event Handlers ---
|
| 1612 |
|
| 1613 |
def _sanitize_input(text: str) -> str:
|
| 1614 |
"""Removes control characters and leading/trailing whitespace."""
|
| 1615 |
if not text: return ""
|
| 1616 |
return "".join(ch for ch in text if unicodedata.category(ch)[0] != "C").strip()
|
| 1617 |
|
| 1618 |
+
def talk(m, uid, role, mode, hist, request: gr.Request): # type: ignore
|
|
|
|
| 1619 |
eff = role if mode == "admin" else "user"
|
| 1620 |
|
| 1621 |
sanitized_m = _sanitize_input(m)
|
| 1622 |
if not sanitized_m:
|
| 1623 |
hist.append([m, "Please provide a message."])
|
| 1624 |
+
yield hist, gr.Textbox(value="")
|
| 1625 |
return
|
| 1626 |
|
| 1627 |
hist.append([sanitized_m, ""])
|
| 1628 |
+
yield hist, gr.Textbox(value="") # Clear input box immediately
|
| 1629 |
|
| 1630 |
+
hive_instance = get_hive_instance() # Get the correct instance for every message
|
| 1631 |
final_message, intent = hive_instance._prepare_chat_input(sanitized_m, "en", False, None)
|
| 1632 |
+
|
| 1633 |
+
def _prepare_chat_logic(m: str) -> tuple[str, int]:
|
| 1634 |
+
"""Prepares logic for chat, including token sizing."""
|
| 1635 |
+
intent = "essay_review" if len(m.split()) > 50 else "tutor"
|
| 1636 |
+
max_tokens = 512 if intent == "essay_review" else 256
|
| 1637 |
+
return intent, max_tokens
|
| 1638 |
_, max_tokens = _prepare_chat_logic(sanitized_m) # Keep this for token sizing
|
| 1639 |
|
| 1640 |
# --- Streaming Response ---
|
|
|
|
| 1643 |
for token in hive_instance.chat_stream(full_prompt, max_new_tokens=max_tokens, temperature=0.7):
|
| 1644 |
full_reply += token
|
| 1645 |
hist[-1][1] = full_reply
|
| 1646 |
+
yield hist, gr.Textbox(value="")
|
| 1647 |
|
| 1648 |
# This block should only run in full mode.
|
| 1649 |
if not hive_instance.lite_mode:
|
|
|
|
| 1656 |
|
| 1657 |
# Final update to history
|
| 1658 |
hist[-1][1] = hive_instance._postprocess_and_log(full_reply, sanitized_m, eff, uid, intent, [])
|
| 1659 |
+
yield hist, gr.Textbox(value="")
|
| 1660 |
+
msg.submit(talk, [msg, uid_state, role_state, mode_state, chatbot], [chatbot, msg], api_name="chat")
|
| 1661 |
|
| 1662 |
def do_memory_summary(uid):
|
| 1663 |
hive_instance = get_hive_instance()
|
|
|
|
| 1697 |
get_hive_instance()
|
| 1698 |
return (
|
| 1699 |
"β
**Full Hive Core is Ready.**",
|
| 1700 |
+
gr.Textbox(placeholder=f"Talk to {CFG['AGENT_NAME']}", interactive=True, visible=True),
|
| 1701 |
+
gr.Accordion(visible=True), # memory
|
| 1702 |
+
gr.Accordion(visible=True), # vocab
|
| 1703 |
+
gr.Accordion(visible=True), # progress
|
| 1704 |
+
gr.Accordion(visible=True), # settings
|
| 1705 |
+
gr.Accordion(visible=True), # vocal
|
| 1706 |
+
gr.Accordion(visible=True), # help
|
| 1707 |
)
|
| 1708 |
+
demo.load(wait_for_hive_core, [], [core_status, msg, memory_accordion, vocab_accordion, progress_accordion, settings_accordion, vocal_accordion, help_accordion], api_name="wait_for_hive_core")
|
| 1709 |
|
| 1710 |
def do_login(nm,pw,sec):
|
| 1711 |
ok, info=attempt_login(nm or "", pw or "", sec or None)
|
|
|
|
| 1934 |
# This function has a potential issue if get_hive_instance() returns a lite instance.
|
| 1935 |
def compress_memory(h):
|
| 1936 |
if h.lite_mode or not hasattr(h, 'store'):
|
| 1937 |
+
return "Memory compression is not available until the Full Hive Core is ready."
|
| 1938 |
ok,msg= _archive_memory(str(h.store.dir))
|
| 1939 |
return msg
|
| 1940 |
mem_compress_btn.click(lambda: compress_memory(get_hive_instance()), [], [compress_status])
|
| 1941 |
|
| 1942 |
def do_hotpatch(patch_json):
|
| 1943 |
try: patch=json.loads(patch_json)
|
| 1944 |
+
except Exception as e: return f"Invalid JSON: {e}"
|
| 1945 |
hive_instance = get_hive_instance()
|
| 1946 |
+
if hive_instance.lite_mode or not hasattr(hive_instance, 'overlay'): return "Hotpatching is not available until the Full Hive Core is ready."
|
| 1947 |
ok,msg=hive_instance.overlay.patch(patch,get_hive_instance()) # type: ignore
|
| 1948 |
return msg
|
| 1949 |
hotpatch_apply.click(do_hotpatch,[hotpatch_patch],[hotpatch_status])
|
|
|
|
| 1986 |
self.hive_instance: Optional[Hive] = None
|
| 1987 |
self.hive_lite_instance: Optional[Hive] = None
|
| 1988 |
self.hive_ready = threading.Event()
|
| 1989 |
+
self.voice_ready = threading.Event()
|
| 1990 |
self.env: Optional[Dict] = None
|
| 1991 |
self.app: Optional[gr.Blocks] = None
|
| 1992 |
self.ui_thread: Optional[threading.Thread] = None
|
|
|
|
| 2028 |
self.hive_lite_instance = Hive(lite=True)
|
| 2029 |
print("[Bootstrap] Lite Hive core is ready.")
|
| 2030 |
|
| 2031 |
+
# Pre-load voice models in a separate thread
|
| 2032 |
+
def voice_init_task():
|
| 2033 |
+
print("[Bootstrap] Initializing Voice models (ASR/TTS) in background...")
|
| 2034 |
+
get_asr()
|
| 2035 |
+
get_tts(CFG["TTS_LANG"])
|
| 2036 |
+
self.voice_ready.set()
|
| 2037 |
+
print("[Bootstrap] Voice models are ready.")
|
| 2038 |
+
|
| 2039 |
# Now initialize the full instance in the background. This is the slow part.
|
| 2040 |
def full_init_task():
|
| 2041 |
print("[Bootstrap] Initializing Full Hive core in background...")
|
|
|
|
| 2048 |
self.ui_thread = threading.Thread(target=self.launch, daemon=True)
|
| 2049 |
|
| 2050 |
threading.Thread(target=full_init_task, daemon=True).start()
|
| 2051 |
+
threading.Thread(target=voice_init_task, daemon=True).start()
|
| 2052 |
|
| 2053 |
# Register graceful shutdown handler and launch the UI in the main thread
|
| 2054 |
import signal
|