HIVE_4

Sleeping

App Files Files Community

Paulhayes commited on Nov 3

Commit

e32d954

verified ·

1 Parent(s): 10e137f

Update app.py

Browse files

Files changed (1) hide show

app.py +58 -32

app.py CHANGED Viewed

@@ -1226,11 +1226,21 @@ class Hive:
         if torch and torch.cuda.is_available() and self.device == "cuda":
             kwargs.update(dict(torch_dtype=torch.float16))
-        use_remote = CFG["HIVE_USE_HF_INFERENCE"]
         if use_remote:
-            from huggingface_hub import InferenceClient
-            endpoint = CFG["HIVE_HF_ENDPOINT"] or None
-            token = CFG["HF_READ_TOKEN"] or os.getenv("HF_TOKEN") or os.getenv("HUGGING_FACE_HUB_TOKEN") or None
             self.client = InferenceClient(model=self.model_id if endpoint is None else None, token=token, timeout=60, base_url=endpoint)
             def _remote_pipe(prompt, max_new_tokens=256, do_sample=True, temperature=0.7, **kw):
                 stop = kw.get("stop_sequences") or ["</s>", "Assistant:"]
@@ -1242,6 +1252,7 @@ class Hive:
             self.model = None # No local model needed
             self.stopping_criteria = None # Not used with InferenceClient
         else:
             self.tok = AutoTokenizer.from_pretrained(self.model_id, trust_remote_code=trust, chat_template=None)
             if self.tok.pad_token is None:
                 self.tok.pad_token = self.tok.eos_token
@@ -1488,27 +1499,27 @@ def launch_ui(bootstrap_instance: "Bootstrap"):
     with gr.Blocks(title="Hive 🐝 Full Merged Optimized") as demo:
         with gr.Row():
-            with gr.Column(scale=3):
                 gr.Markdown(f"## {CFG['AGENT_NAME']} 🐝")
                 core_status = gr.Markdown("⏳ **Initializing Full Hive Core...** You can chat with the Lite model now. Advanced features will be enabled shortly.") # type: ignore
                 chatbot=gr.Chatbot(height=600, type="messages")
-                msg=gr.Textbox(placeholder=f"Talk to {CFG['AGENT_NAME']} (Lite Mode)", interactive=True, show_label=False, container=False)
             with gr.Column(scale=1, min_width=300):
                 with gr.Sidebar():
                     uid_state=gr.State(None); role_state=gr.State("guest"); mode_state=gr.State("user"); phonics_state=gr.State(False)
-                with gr.Accordion("🧠 Memory Summary", open=False) as memory_accordion:
                         summary_output = gr.Markdown("Click the button to generate a summary of your recent conversations.")
                         summary_btn = gr.Button("Show Memory Summary")
-                with gr.Accordion("🎓 Vocabulary Trainer", open=False) as vocab_accordion:
                         vocab_output = gr.Markdown("Click to get a new vocabulary word from your conversations.")
                         vocab_btn = gr.Button("Get New Word")
-                with gr.Accordion("📊 Progress Board", open=False) as progress_accordion:
                         progress_output = gr.Markdown("Your progress will be shown here.")
-                with gr.Accordion("Settings & Admin Controls", open=False) as settings_accordion:
                         with gr.Tabs():
                             with gr.TabItem("Login & Profile") as login_tab:
                                 with gr.Row():
@@ -1588,37 +1599,42 @@ def launch_ui(bootstrap_instance: "Bootstrap"):
                                         propose_btn=gr.Button("Propose"); test_btn=gr.Button("Test in sandbox"); apply_btn=gr.Button("Apply (policy-checked)", elem_id="apply_btn")
                                         opt_out=gr.JSON(label="Result")
-                with gr.Accordion("Vocal Chat (Hands-Free)", open=False) as vocal_accordion:
                         vocal_chat_state = gr.State({"active": False, "user_audio": None, "last_chunk_time": 0})
                         vocal_chat_btn = gr.Button("Start Vocal Chat")
                         vocal_chat_status = gr.Markdown("Status: Inactive")
                         vocal_mic = gr.Audio(sources=["microphone"], streaming=True, visible=False, autoplay=True)
-                with gr.Accordion("Help", open=False) as help_accordion:
                     gr.Markdown(HELP)
-        # --- Event Handlers ---
         def _sanitize_input(text: str) -> str:
             """Removes control characters and leading/trailing whitespace."""
             if not text: return ""
             return "".join(ch for ch in text if unicodedata.category(ch)[0] != "C").strip()
-        def talk(m, uid, role, mode, hist): # type: ignore
-            hive_instance = get_hive_instance() # type: ignore
             eff = role if mode == "admin" else "user"
             sanitized_m = _sanitize_input(m)
             if not sanitized_m:
                 hist.append([m, "Please provide a message."])
-                yield hist, ""
                 return
             hist.append([sanitized_m, ""])
-            yield hist, "" # Clear input box immediately
-            # Let the Hive class handle all logic, including intent detection
             final_message, intent = hive_instance._prepare_chat_input(sanitized_m, "en", False, None)
             _, max_tokens = _prepare_chat_logic(sanitized_m) # Keep this for token sizing
             # --- Streaming Response ---
@@ -1627,7 +1643,7 @@ def launch_ui(bootstrap_instance: "Bootstrap"):
             for token in hive_instance.chat_stream(full_prompt, max_new_tokens=max_tokens, temperature=0.7):
                 full_reply += token
                 hist[-1][1] = full_reply
-                yield hist, ""
             # This block should only run in full mode.
             if not hive_instance.lite_mode:
@@ -1640,8 +1656,8 @@ def launch_ui(bootstrap_instance: "Bootstrap"):
             # Final update to history
             hist[-1][1] = hive_instance._postprocess_and_log(full_reply, sanitized_m, eff, uid, intent, [])
-            yield hist, ""
-        msg.submit(talk, [msg, uid_state, role_state, mode_state, chatbot], [chatbot, msg])
         def do_memory_summary(uid):
             hive_instance = get_hive_instance()
@@ -1681,15 +1697,15 @@ def launch_ui(bootstrap_instance: "Bootstrap"):
             get_hive_instance()
             return (
                 "✅ **Full Hive Core is Ready.**",
-                gr.Textbox(placeholder=f"Talk to {CFG['AGENT_NAME']}", interactive=True),
-                gr.Accordion(interactive=True), # memory
-                gr.Accordion(interactive=True), # vocab
-                gr.Accordion(interactive=True), # progress
-                gr.Accordion(interactive=True), # settings
-                gr.Accordion(interactive=True), # vocal
-                gr.Accordion(interactive=True), # help
             )
-        demo.load(wait_for_hive_core, [], [core_status, msg, memory_accordion, vocab_accordion, progress_accordion, settings_accordion, vocal_accordion, help_accordion])
         def do_login(nm,pw,sec):
             ok, info=attempt_login(nm or "", pw or "", sec or None)
@@ -1918,16 +1934,16 @@ def launch_ui(bootstrap_instance: "Bootstrap"):
         # This function has a potential issue if get_hive_instance() returns a lite instance.
         def compress_memory(h):
             if h.lite_mode or not hasattr(h, 'store'):
-                return "Memory compression is disabled in Lite Mode."
             ok,msg= _archive_memory(str(h.store.dir))
             return msg
         mem_compress_btn.click(lambda: compress_memory(get_hive_instance()), [], [compress_status])
         def do_hotpatch(patch_json):
             try: patch=json.loads(patch_json)
-            except Exception: return "Bad JSON." # type: ignore
             hive_instance = get_hive_instance()
-            if hive_instance.lite_mode or not hasattr(hive_instance, 'overlay'): return "Hotpatching is disabled in Lite Mode."
             ok,msg=hive_instance.overlay.patch(patch,get_hive_instance()) # type: ignore
             return msg
         hotpatch_apply.click(do_hotpatch,[hotpatch_patch],[hotpatch_status])
@@ -1970,6 +1986,7 @@ class Bootstrap:
         self.hive_instance: Optional[Hive] = None
         self.hive_lite_instance: Optional[Hive] = None
         self.hive_ready = threading.Event()
         self.env: Optional[Dict] = None
         self.app: Optional[gr.Blocks] = None
         self.ui_thread: Optional[threading.Thread] = None
@@ -2011,6 +2028,14 @@ class Bootstrap:
         self.hive_lite_instance = Hive(lite=True)
         print("[Bootstrap] Lite Hive core is ready.")
         # Now initialize the full instance in the background. This is the slow part.
         def full_init_task():
             print("[Bootstrap] Initializing Full Hive core in background...")
@@ -2023,6 +2048,7 @@ class Bootstrap:
         self.ui_thread = threading.Thread(target=self.launch, daemon=True)
         threading.Thread(target=full_init_task, daemon=True).start()
         # Register graceful shutdown handler and launch the UI in the main thread
         import signal

         if torch and torch.cuda.is_available() and self.device == "cuda":
             kwargs.update(dict(torch_dtype=torch.float16))
+        # --- Automatic Inference Mode Switching ---
+        # Default to local inference for Pi/local machines, remote for HF Spaces.
+        # This can be manually overridden by setting HIVE_USE_HF_INFERENCE.
+        is_hf_space = "SPACE_ID" in os.environ
+        use_remote_default = is_hf_space
+        # Check for manual override from environment variable
+        if "HIVE_USE_HF_INFERENCE" in os.environ:
+            use_remote = CFG["HIVE_USE_HF_INFERENCE"]
+        else:
+            use_remote = use_remote_default
         if use_remote:
+            print("[Hive] Using remote Hugging Face Inference endpoint.")
+            from huggingface_hub import InferenceClient; endpoint = CFG["HIVE_HF_ENDPOINT"] or None; token = CFG["HF_READ_TOKEN"] or os.getenv("HF_TOKEN") or os.getenv("HUGGING_FACE_HUB_TOKEN") or None
             self.client = InferenceClient(model=self.model_id if endpoint is None else None, token=token, timeout=60, base_url=endpoint)
             def _remote_pipe(prompt, max_new_tokens=256, do_sample=True, temperature=0.7, **kw):
                 stop = kw.get("stop_sequences") or ["</s>", "Assistant:"]
             self.model = None # No local model needed
             self.stopping_criteria = None # Not used with InferenceClient
         else:
+            print("[Hive] Using local LLM for inference.")
             self.tok = AutoTokenizer.from_pretrained(self.model_id, trust_remote_code=trust, chat_template=None)
             if self.tok.pad_token is None:
                 self.tok.pad_token = self.tok.eos_token
     with gr.Blocks(title="Hive 🐝 Full Merged Optimized") as demo:
         with gr.Row():
+            with gr.Column(scale=4):
                 gr.Markdown(f"## {CFG['AGENT_NAME']} 🐝")
                 core_status = gr.Markdown("⏳ **Initializing Full Hive Core...** You can chat with the Lite model now. Advanced features will be enabled shortly.") # type: ignore
                 chatbot=gr.Chatbot(height=600, type="messages")
+                msg=gr.Textbox(placeholder=f"Talk to {CFG['AGENT_NAME']} (Lite Mode)", interactive=True, show_label=False, container=False, scale=4)
             with gr.Column(scale=1, min_width=300):
                 with gr.Sidebar():
                     uid_state=gr.State(None); role_state=gr.State("guest"); mode_state=gr.State("user"); phonics_state=gr.State(False)
+                with gr.Accordion("🧠 Memory Summary", open=False, visible=False) as memory_accordion:
                         summary_output = gr.Markdown("Click the button to generate a summary of your recent conversations.")
                         summary_btn = gr.Button("Show Memory Summary")
+                with gr.Accordion("🎓 Vocabulary Trainer", open=False, visible=False) as vocab_accordion:
                         vocab_output = gr.Markdown("Click to get a new vocabulary word from your conversations.")
                         vocab_btn = gr.Button("Get New Word")
+                with gr.Accordion("📊 Progress Board", open=False, visible=False) as progress_accordion:
                         progress_output = gr.Markdown("Your progress will be shown here.")
+                with gr.Accordion("Settings & Admin Controls", open=True) as settings_accordion:
                         with gr.Tabs():
                             with gr.TabItem("Login & Profile") as login_tab:
                                 with gr.Row():
                                         propose_btn=gr.Button("Propose"); test_btn=gr.Button("Test in sandbox"); apply_btn=gr.Button("Apply (policy-checked)", elem_id="apply_btn")
                                         opt_out=gr.JSON(label="Result")
+                with gr.Accordion("Vocal Chat (Hands-Free)", open=False, visible=False) as vocal_accordion:
                         vocal_chat_state = gr.State({"active": False, "user_audio": None, "last_chunk_time": 0})
                         vocal_chat_btn = gr.Button("Start Vocal Chat")
                         vocal_chat_status = gr.Markdown("Status: Inactive")
                         vocal_mic = gr.Audio(sources=["microphone"], streaming=True, visible=False, autoplay=True)
+                with gr.Accordion("Help", open=True) as help_accordion:
                     gr.Markdown(HELP)
+        # --- Event Handlers ---
         def _sanitize_input(text: str) -> str:
             """Removes control characters and leading/trailing whitespace."""
             if not text: return ""
             return "".join(ch for ch in text if unicodedata.category(ch)[0] != "C").strip()
+        def talk(m, uid, role, mode, hist, request: gr.Request): # type: ignore
             eff = role if mode == "admin" else "user"
             sanitized_m = _sanitize_input(m)
             if not sanitized_m:
                 hist.append([m, "Please provide a message."])
+                yield hist, gr.Textbox(value="")
                 return
             hist.append([sanitized_m, ""])
+            yield hist, gr.Textbox(value="") # Clear input box immediately
+            hive_instance = get_hive_instance() # Get the correct instance for every message
             final_message, intent = hive_instance._prepare_chat_input(sanitized_m, "en", False, None)
+            def _prepare_chat_logic(m: str) -> tuple[str, int]:
+                """Prepares logic for chat, including token sizing."""
+                intent = "essay_review" if len(m.split()) > 50 else "tutor"
+                max_tokens = 512 if intent == "essay_review" else 256
+                return intent, max_tokens
             _, max_tokens = _prepare_chat_logic(sanitized_m) # Keep this for token sizing
             # --- Streaming Response ---
             for token in hive_instance.chat_stream(full_prompt, max_new_tokens=max_tokens, temperature=0.7):
                 full_reply += token
                 hist[-1][1] = full_reply
+                yield hist, gr.Textbox(value="")
             # This block should only run in full mode.
             if not hive_instance.lite_mode:
             # Final update to history
             hist[-1][1] = hive_instance._postprocess_and_log(full_reply, sanitized_m, eff, uid, intent, [])
+            yield hist, gr.Textbox(value="")
+        msg.submit(talk, [msg, uid_state, role_state, mode_state, chatbot], [chatbot, msg], api_name="chat")
         def do_memory_summary(uid):
             hive_instance = get_hive_instance()
             get_hive_instance()
             return (
                 "✅ **Full Hive Core is Ready.**",
+                gr.Textbox(placeholder=f"Talk to {CFG['AGENT_NAME']}", interactive=True, visible=True),
+                gr.Accordion(visible=True), # memory
+                gr.Accordion(visible=True), # vocab
+                gr.Accordion(visible=True), # progress
+                gr.Accordion(visible=True), # settings
+                gr.Accordion(visible=True), # vocal
+                gr.Accordion(visible=True), # help
             )
+        demo.load(wait_for_hive_core, [], [core_status, msg, memory_accordion, vocab_accordion, progress_accordion, settings_accordion, vocal_accordion, help_accordion], api_name="wait_for_hive_core")
         def do_login(nm,pw,sec):
             ok, info=attempt_login(nm or "", pw or "", sec or None)
         # This function has a potential issue if get_hive_instance() returns a lite instance.
         def compress_memory(h):
             if h.lite_mode or not hasattr(h, 'store'):
+                return "Memory compression is not available until the Full Hive Core is ready."
             ok,msg= _archive_memory(str(h.store.dir))
             return msg
         mem_compress_btn.click(lambda: compress_memory(get_hive_instance()), [], [compress_status])
         def do_hotpatch(patch_json):
             try: patch=json.loads(patch_json)
+            except Exception as e: return f"Invalid JSON: {e}"
             hive_instance = get_hive_instance()
+            if hive_instance.lite_mode or not hasattr(hive_instance, 'overlay'): return "Hotpatching is not available until the Full Hive Core is ready."
             ok,msg=hive_instance.overlay.patch(patch,get_hive_instance()) # type: ignore
             return msg
         hotpatch_apply.click(do_hotpatch,[hotpatch_patch],[hotpatch_status])
         self.hive_instance: Optional[Hive] = None
         self.hive_lite_instance: Optional[Hive] = None
         self.hive_ready = threading.Event()
+        self.voice_ready = threading.Event()
         self.env: Optional[Dict] = None
         self.app: Optional[gr.Blocks] = None
         self.ui_thread: Optional[threading.Thread] = None
         self.hive_lite_instance = Hive(lite=True)
         print("[Bootstrap] Lite Hive core is ready.")
+        # Pre-load voice models in a separate thread
+        def voice_init_task():
+            print("[Bootstrap] Initializing Voice models (ASR/TTS) in background...")
+            get_asr()
+            get_tts(CFG["TTS_LANG"])
+            self.voice_ready.set()
+            print("[Bootstrap] Voice models are ready.")
         # Now initialize the full instance in the background. This is the slow part.
         def full_init_task():
             print("[Bootstrap] Initializing Full Hive core in background...")
         self.ui_thread = threading.Thread(target=self.launch, daemon=True)
         threading.Thread(target=full_init_task, daemon=True).start()
+        threading.Thread(target=voice_init_task, daemon=True).start()
         # Register graceful shutdown handler and launch the UI in the main thread
         import signal