Paulhayes commited on
Commit
e32d954
Β·
verified Β·
1 Parent(s): 10e137f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +58 -32
app.py CHANGED
@@ -1226,11 +1226,21 @@ class Hive:
1226
  if torch and torch.cuda.is_available() and self.device == "cuda":
1227
  kwargs.update(dict(torch_dtype=torch.float16))
1228
 
1229
- use_remote = CFG["HIVE_USE_HF_INFERENCE"]
 
 
 
 
 
 
 
 
 
 
 
1230
  if use_remote:
1231
- from huggingface_hub import InferenceClient
1232
- endpoint = CFG["HIVE_HF_ENDPOINT"] or None
1233
- token = CFG["HF_READ_TOKEN"] or os.getenv("HF_TOKEN") or os.getenv("HUGGING_FACE_HUB_TOKEN") or None
1234
  self.client = InferenceClient(model=self.model_id if endpoint is None else None, token=token, timeout=60, base_url=endpoint)
1235
  def _remote_pipe(prompt, max_new_tokens=256, do_sample=True, temperature=0.7, **kw):
1236
  stop = kw.get("stop_sequences") or ["</s>", "Assistant:"]
@@ -1242,6 +1252,7 @@ class Hive:
1242
  self.model = None # No local model needed
1243
  self.stopping_criteria = None # Not used with InferenceClient
1244
  else:
 
1245
  self.tok = AutoTokenizer.from_pretrained(self.model_id, trust_remote_code=trust, chat_template=None)
1246
  if self.tok.pad_token is None:
1247
  self.tok.pad_token = self.tok.eos_token
@@ -1488,27 +1499,27 @@ def launch_ui(bootstrap_instance: "Bootstrap"):
1488
 
1489
  with gr.Blocks(title="Hive 🐝 Full Merged Optimized") as demo:
1490
  with gr.Row():
1491
- with gr.Column(scale=3):
1492
  gr.Markdown(f"## {CFG['AGENT_NAME']} 🐝")
1493
  core_status = gr.Markdown("⏳ **Initializing Full Hive Core...** You can chat with the Lite model now. Advanced features will be enabled shortly.") # type: ignore
1494
  chatbot=gr.Chatbot(height=600, type="messages")
1495
- msg=gr.Textbox(placeholder=f"Talk to {CFG['AGENT_NAME']} (Lite Mode)", interactive=True, show_label=False, container=False)
1496
 
1497
  with gr.Column(scale=1, min_width=300):
1498
  with gr.Sidebar():
1499
  uid_state=gr.State(None); role_state=gr.State("guest"); mode_state=gr.State("user"); phonics_state=gr.State(False)
1500
- with gr.Accordion("🧠 Memory Summary", open=False) as memory_accordion:
1501
  summary_output = gr.Markdown("Click the button to generate a summary of your recent conversations.")
1502
  summary_btn = gr.Button("Show Memory Summary")
1503
 
1504
- with gr.Accordion("πŸŽ“ Vocabulary Trainer", open=False) as vocab_accordion:
1505
  vocab_output = gr.Markdown("Click to get a new vocabulary word from your conversations.")
1506
  vocab_btn = gr.Button("Get New Word")
1507
 
1508
- with gr.Accordion("πŸ“Š Progress Board", open=False) as progress_accordion:
1509
  progress_output = gr.Markdown("Your progress will be shown here.")
1510
 
1511
- with gr.Accordion("Settings & Admin Controls", open=False) as settings_accordion:
1512
  with gr.Tabs():
1513
  with gr.TabItem("Login & Profile") as login_tab:
1514
  with gr.Row():
@@ -1588,37 +1599,42 @@ def launch_ui(bootstrap_instance: "Bootstrap"):
1588
  propose_btn=gr.Button("Propose"); test_btn=gr.Button("Test in sandbox"); apply_btn=gr.Button("Apply (policy-checked)", elem_id="apply_btn")
1589
  opt_out=gr.JSON(label="Result")
1590
 
1591
- with gr.Accordion("Vocal Chat (Hands-Free)", open=False) as vocal_accordion:
1592
  vocal_chat_state = gr.State({"active": False, "user_audio": None, "last_chunk_time": 0})
1593
  vocal_chat_btn = gr.Button("Start Vocal Chat")
1594
  vocal_chat_status = gr.Markdown("Status: Inactive")
1595
  vocal_mic = gr.Audio(sources=["microphone"], streaming=True, visible=False, autoplay=True)
1596
 
1597
- with gr.Accordion("Help", open=False) as help_accordion:
1598
  gr.Markdown(HELP)
1599
 
1600
- # --- Event Handlers ---
1601
 
1602
  def _sanitize_input(text: str) -> str:
1603
  """Removes control characters and leading/trailing whitespace."""
1604
  if not text: return ""
1605
  return "".join(ch for ch in text if unicodedata.category(ch)[0] != "C").strip()
1606
 
1607
- def talk(m, uid, role, mode, hist): # type: ignore
1608
- hive_instance = get_hive_instance() # type: ignore
1609
  eff = role if mode == "admin" else "user"
1610
 
1611
  sanitized_m = _sanitize_input(m)
1612
  if not sanitized_m:
1613
  hist.append([m, "Please provide a message."])
1614
- yield hist, ""
1615
  return
1616
 
1617
  hist.append([sanitized_m, ""])
1618
- yield hist, "" # Clear input box immediately
1619
 
1620
- # Let the Hive class handle all logic, including intent detection
1621
  final_message, intent = hive_instance._prepare_chat_input(sanitized_m, "en", False, None)
 
 
 
 
 
 
1622
  _, max_tokens = _prepare_chat_logic(sanitized_m) # Keep this for token sizing
1623
 
1624
  # --- Streaming Response ---
@@ -1627,7 +1643,7 @@ def launch_ui(bootstrap_instance: "Bootstrap"):
1627
  for token in hive_instance.chat_stream(full_prompt, max_new_tokens=max_tokens, temperature=0.7):
1628
  full_reply += token
1629
  hist[-1][1] = full_reply
1630
- yield hist, ""
1631
 
1632
  # This block should only run in full mode.
1633
  if not hive_instance.lite_mode:
@@ -1640,8 +1656,8 @@ def launch_ui(bootstrap_instance: "Bootstrap"):
1640
 
1641
  # Final update to history
1642
  hist[-1][1] = hive_instance._postprocess_and_log(full_reply, sanitized_m, eff, uid, intent, [])
1643
- yield hist, ""
1644
- msg.submit(talk, [msg, uid_state, role_state, mode_state, chatbot], [chatbot, msg])
1645
 
1646
  def do_memory_summary(uid):
1647
  hive_instance = get_hive_instance()
@@ -1681,15 +1697,15 @@ def launch_ui(bootstrap_instance: "Bootstrap"):
1681
  get_hive_instance()
1682
  return (
1683
  "βœ… **Full Hive Core is Ready.**",
1684
- gr.Textbox(placeholder=f"Talk to {CFG['AGENT_NAME']}", interactive=True),
1685
- gr.Accordion(interactive=True), # memory
1686
- gr.Accordion(interactive=True), # vocab
1687
- gr.Accordion(interactive=True), # progress
1688
- gr.Accordion(interactive=True), # settings
1689
- gr.Accordion(interactive=True), # vocal
1690
- gr.Accordion(interactive=True), # help
1691
  )
1692
- demo.load(wait_for_hive_core, [], [core_status, msg, memory_accordion, vocab_accordion, progress_accordion, settings_accordion, vocal_accordion, help_accordion])
1693
 
1694
  def do_login(nm,pw,sec):
1695
  ok, info=attempt_login(nm or "", pw or "", sec or None)
@@ -1918,16 +1934,16 @@ def launch_ui(bootstrap_instance: "Bootstrap"):
1918
  # This function has a potential issue if get_hive_instance() returns a lite instance.
1919
  def compress_memory(h):
1920
  if h.lite_mode or not hasattr(h, 'store'):
1921
- return "Memory compression is disabled in Lite Mode."
1922
  ok,msg= _archive_memory(str(h.store.dir))
1923
  return msg
1924
  mem_compress_btn.click(lambda: compress_memory(get_hive_instance()), [], [compress_status])
1925
 
1926
  def do_hotpatch(patch_json):
1927
  try: patch=json.loads(patch_json)
1928
- except Exception: return "Bad JSON." # type: ignore
1929
  hive_instance = get_hive_instance()
1930
- if hive_instance.lite_mode or not hasattr(hive_instance, 'overlay'): return "Hotpatching is disabled in Lite Mode."
1931
  ok,msg=hive_instance.overlay.patch(patch,get_hive_instance()) # type: ignore
1932
  return msg
1933
  hotpatch_apply.click(do_hotpatch,[hotpatch_patch],[hotpatch_status])
@@ -1970,6 +1986,7 @@ class Bootstrap:
1970
  self.hive_instance: Optional[Hive] = None
1971
  self.hive_lite_instance: Optional[Hive] = None
1972
  self.hive_ready = threading.Event()
 
1973
  self.env: Optional[Dict] = None
1974
  self.app: Optional[gr.Blocks] = None
1975
  self.ui_thread: Optional[threading.Thread] = None
@@ -2011,6 +2028,14 @@ class Bootstrap:
2011
  self.hive_lite_instance = Hive(lite=True)
2012
  print("[Bootstrap] Lite Hive core is ready.")
2013
 
 
 
 
 
 
 
 
 
2014
  # Now initialize the full instance in the background. This is the slow part.
2015
  def full_init_task():
2016
  print("[Bootstrap] Initializing Full Hive core in background...")
@@ -2023,6 +2048,7 @@ class Bootstrap:
2023
  self.ui_thread = threading.Thread(target=self.launch, daemon=True)
2024
 
2025
  threading.Thread(target=full_init_task, daemon=True).start()
 
2026
 
2027
  # Register graceful shutdown handler and launch the UI in the main thread
2028
  import signal
 
1226
  if torch and torch.cuda.is_available() and self.device == "cuda":
1227
  kwargs.update(dict(torch_dtype=torch.float16))
1228
 
1229
+ # --- Automatic Inference Mode Switching ---
1230
+ # Default to local inference for Pi/local machines, remote for HF Spaces.
1231
+ # This can be manually overridden by setting HIVE_USE_HF_INFERENCE.
1232
+ is_hf_space = "SPACE_ID" in os.environ
1233
+ use_remote_default = is_hf_space
1234
+
1235
+ # Check for manual override from environment variable
1236
+ if "HIVE_USE_HF_INFERENCE" in os.environ:
1237
+ use_remote = CFG["HIVE_USE_HF_INFERENCE"]
1238
+ else:
1239
+ use_remote = use_remote_default
1240
+
1241
  if use_remote:
1242
+ print("[Hive] Using remote Hugging Face Inference endpoint.")
1243
+ from huggingface_hub import InferenceClient; endpoint = CFG["HIVE_HF_ENDPOINT"] or None; token = CFG["HF_READ_TOKEN"] or os.getenv("HF_TOKEN") or os.getenv("HUGGING_FACE_HUB_TOKEN") or None
 
1244
  self.client = InferenceClient(model=self.model_id if endpoint is None else None, token=token, timeout=60, base_url=endpoint)
1245
  def _remote_pipe(prompt, max_new_tokens=256, do_sample=True, temperature=0.7, **kw):
1246
  stop = kw.get("stop_sequences") or ["</s>", "Assistant:"]
 
1252
  self.model = None # No local model needed
1253
  self.stopping_criteria = None # Not used with InferenceClient
1254
  else:
1255
+ print("[Hive] Using local LLM for inference.")
1256
  self.tok = AutoTokenizer.from_pretrained(self.model_id, trust_remote_code=trust, chat_template=None)
1257
  if self.tok.pad_token is None:
1258
  self.tok.pad_token = self.tok.eos_token
 
1499
 
1500
  with gr.Blocks(title="Hive 🐝 Full Merged Optimized") as demo:
1501
  with gr.Row():
1502
+ with gr.Column(scale=4):
1503
  gr.Markdown(f"## {CFG['AGENT_NAME']} 🐝")
1504
  core_status = gr.Markdown("⏳ **Initializing Full Hive Core...** You can chat with the Lite model now. Advanced features will be enabled shortly.") # type: ignore
1505
  chatbot=gr.Chatbot(height=600, type="messages")
1506
+ msg=gr.Textbox(placeholder=f"Talk to {CFG['AGENT_NAME']} (Lite Mode)", interactive=True, show_label=False, container=False, scale=4)
1507
 
1508
  with gr.Column(scale=1, min_width=300):
1509
  with gr.Sidebar():
1510
  uid_state=gr.State(None); role_state=gr.State("guest"); mode_state=gr.State("user"); phonics_state=gr.State(False)
1511
+ with gr.Accordion("🧠 Memory Summary", open=False, visible=False) as memory_accordion:
1512
  summary_output = gr.Markdown("Click the button to generate a summary of your recent conversations.")
1513
  summary_btn = gr.Button("Show Memory Summary")
1514
 
1515
+ with gr.Accordion("πŸŽ“ Vocabulary Trainer", open=False, visible=False) as vocab_accordion:
1516
  vocab_output = gr.Markdown("Click to get a new vocabulary word from your conversations.")
1517
  vocab_btn = gr.Button("Get New Word")
1518
 
1519
+ with gr.Accordion("πŸ“Š Progress Board", open=False, visible=False) as progress_accordion:
1520
  progress_output = gr.Markdown("Your progress will be shown here.")
1521
 
1522
+ with gr.Accordion("Settings & Admin Controls", open=True) as settings_accordion:
1523
  with gr.Tabs():
1524
  with gr.TabItem("Login & Profile") as login_tab:
1525
  with gr.Row():
 
1599
  propose_btn=gr.Button("Propose"); test_btn=gr.Button("Test in sandbox"); apply_btn=gr.Button("Apply (policy-checked)", elem_id="apply_btn")
1600
  opt_out=gr.JSON(label="Result")
1601
 
1602
+ with gr.Accordion("Vocal Chat (Hands-Free)", open=False, visible=False) as vocal_accordion:
1603
  vocal_chat_state = gr.State({"active": False, "user_audio": None, "last_chunk_time": 0})
1604
  vocal_chat_btn = gr.Button("Start Vocal Chat")
1605
  vocal_chat_status = gr.Markdown("Status: Inactive")
1606
  vocal_mic = gr.Audio(sources=["microphone"], streaming=True, visible=False, autoplay=True)
1607
 
1608
+ with gr.Accordion("Help", open=True) as help_accordion:
1609
  gr.Markdown(HELP)
1610
 
1611
+ # --- Event Handlers ---
1612
 
1613
  def _sanitize_input(text: str) -> str:
1614
  """Removes control characters and leading/trailing whitespace."""
1615
  if not text: return ""
1616
  return "".join(ch for ch in text if unicodedata.category(ch)[0] != "C").strip()
1617
 
1618
+ def talk(m, uid, role, mode, hist, request: gr.Request): # type: ignore
 
1619
  eff = role if mode == "admin" else "user"
1620
 
1621
  sanitized_m = _sanitize_input(m)
1622
  if not sanitized_m:
1623
  hist.append([m, "Please provide a message."])
1624
+ yield hist, gr.Textbox(value="")
1625
  return
1626
 
1627
  hist.append([sanitized_m, ""])
1628
+ yield hist, gr.Textbox(value="") # Clear input box immediately
1629
 
1630
+ hive_instance = get_hive_instance() # Get the correct instance for every message
1631
  final_message, intent = hive_instance._prepare_chat_input(sanitized_m, "en", False, None)
1632
+
1633
+ def _prepare_chat_logic(m: str) -> tuple[str, int]:
1634
+ """Prepares logic for chat, including token sizing."""
1635
+ intent = "essay_review" if len(m.split()) > 50 else "tutor"
1636
+ max_tokens = 512 if intent == "essay_review" else 256
1637
+ return intent, max_tokens
1638
  _, max_tokens = _prepare_chat_logic(sanitized_m) # Keep this for token sizing
1639
 
1640
  # --- Streaming Response ---
 
1643
  for token in hive_instance.chat_stream(full_prompt, max_new_tokens=max_tokens, temperature=0.7):
1644
  full_reply += token
1645
  hist[-1][1] = full_reply
1646
+ yield hist, gr.Textbox(value="")
1647
 
1648
  # This block should only run in full mode.
1649
  if not hive_instance.lite_mode:
 
1656
 
1657
  # Final update to history
1658
  hist[-1][1] = hive_instance._postprocess_and_log(full_reply, sanitized_m, eff, uid, intent, [])
1659
+ yield hist, gr.Textbox(value="")
1660
+ msg.submit(talk, [msg, uid_state, role_state, mode_state, chatbot], [chatbot, msg], api_name="chat")
1661
 
1662
  def do_memory_summary(uid):
1663
  hive_instance = get_hive_instance()
 
1697
  get_hive_instance()
1698
  return (
1699
  "βœ… **Full Hive Core is Ready.**",
1700
+ gr.Textbox(placeholder=f"Talk to {CFG['AGENT_NAME']}", interactive=True, visible=True),
1701
+ gr.Accordion(visible=True), # memory
1702
+ gr.Accordion(visible=True), # vocab
1703
+ gr.Accordion(visible=True), # progress
1704
+ gr.Accordion(visible=True), # settings
1705
+ gr.Accordion(visible=True), # vocal
1706
+ gr.Accordion(visible=True), # help
1707
  )
1708
+ demo.load(wait_for_hive_core, [], [core_status, msg, memory_accordion, vocab_accordion, progress_accordion, settings_accordion, vocal_accordion, help_accordion], api_name="wait_for_hive_core")
1709
 
1710
  def do_login(nm,pw,sec):
1711
  ok, info=attempt_login(nm or "", pw or "", sec or None)
 
1934
  # This function has a potential issue if get_hive_instance() returns a lite instance.
1935
  def compress_memory(h):
1936
  if h.lite_mode or not hasattr(h, 'store'):
1937
+ return "Memory compression is not available until the Full Hive Core is ready."
1938
  ok,msg= _archive_memory(str(h.store.dir))
1939
  return msg
1940
  mem_compress_btn.click(lambda: compress_memory(get_hive_instance()), [], [compress_status])
1941
 
1942
  def do_hotpatch(patch_json):
1943
  try: patch=json.loads(patch_json)
1944
+ except Exception as e: return f"Invalid JSON: {e}"
1945
  hive_instance = get_hive_instance()
1946
+ if hive_instance.lite_mode or not hasattr(hive_instance, 'overlay'): return "Hotpatching is not available until the Full Hive Core is ready."
1947
  ok,msg=hive_instance.overlay.patch(patch,get_hive_instance()) # type: ignore
1948
  return msg
1949
  hotpatch_apply.click(do_hotpatch,[hotpatch_patch],[hotpatch_status])
 
1986
  self.hive_instance: Optional[Hive] = None
1987
  self.hive_lite_instance: Optional[Hive] = None
1988
  self.hive_ready = threading.Event()
1989
+ self.voice_ready = threading.Event()
1990
  self.env: Optional[Dict] = None
1991
  self.app: Optional[gr.Blocks] = None
1992
  self.ui_thread: Optional[threading.Thread] = None
 
2028
  self.hive_lite_instance = Hive(lite=True)
2029
  print("[Bootstrap] Lite Hive core is ready.")
2030
 
2031
+ # Pre-load voice models in a separate thread
2032
+ def voice_init_task():
2033
+ print("[Bootstrap] Initializing Voice models (ASR/TTS) in background...")
2034
+ get_asr()
2035
+ get_tts(CFG["TTS_LANG"])
2036
+ self.voice_ready.set()
2037
+ print("[Bootstrap] Voice models are ready.")
2038
+
2039
  # Now initialize the full instance in the background. This is the slow part.
2040
  def full_init_task():
2041
  print("[Bootstrap] Initializing Full Hive core in background...")
 
2048
  self.ui_thread = threading.Thread(target=self.launch, daemon=True)
2049
 
2050
  threading.Thread(target=full_init_task, daemon=True).start()
2051
+ threading.Thread(target=voice_init_task, daemon=True).start()
2052
 
2053
  # Register graceful shutdown handler and launch the UI in the main thread
2054
  import signal