Spaces:

MCP-1st-Birthday
/

cluas_huginn_by_eightzerofoursix

Running

Diomedes Git commited on 8 days ago

Commit

e14bba5

1 Parent(s): 1f22a5a

new prompts, readme notes, chat tests etc

Files changed (4) hide show

README.md CHANGED Viewed

@@ -61,6 +61,46 @@ Most AI assistants are stateless. cluas_huginn Council remembers, learns, and bu
 - "Multi-agent MCP research collective"
 ## Hackathon Track Entries

 - "Multi-agent MCP research collective"
+v2:
+# Cluas Huginn: Multi-Agent Dialectic System
+## What It Does
+Four AI agents with distinct roles debate questions using structured dialectic.
+## Architecture
+- **Corvus**: Academic verifier (literature search)
+- **Raven**: Accountability enforcer (news verification)
+- **Magpie**: Trend explorer (pattern connector)
+- **Crow**: Grounded observer (environmental data)
+## Key Innovations
+1. Unified inheritance architecture
+2. Shared epistemic principles with character differentiation
+3. Tool-use heuristics per character
+4. Steelmanning and collaborative disagreement built-in
+## Tech Stack
+- Base: Python, Gradio
+- LLMs: Groq/Nebius (Qwen 3)
+- Tools: Academic search, news verification, web exploration
+- Memory: Persistent character memories
+## What Makes This Different
+- Not just multiple LLMs - distinct epistemic roles
+- Structured dialectic (thesis/antithesis/synthesis)
+- Tool usage guided by character personality
+- Collaborative, not adversarial
 ## Hackathon Track Entries

src/prompts/character_prompts.py CHANGED Viewed

@@ -51,6 +51,11 @@ CONTRADICTION HANDLING (SHARED):
 - Recognize disagreements between agents
 - Make disagreements explicit but non-hostile
 - Say what evidence would resolve the disagreement
 """

 - Recognize disagreements between agents
 - Make disagreements explicit but non-hostile
 - Say what evidence would resolve the disagreement
+CRITICAL: KEEP RESPONSES TO 2–4 SENTENCES.
+- You're in a group chat with other agents. Be concise.
+- If you have many points, pick the 1–2 most important.
 """

tests/integration/test_chat.py CHANGED Viewed

@@ -128,12 +128,12 @@ def test_parse_mentions_function():
     # Test single mention
     message1 = "@corvus what do you think?"
     mentions1 = parse_mentions(message1)
-    assert mentions1 == ["corvus"], f"Single mention failed: {mentions1}"
     # Test multiple mentions
     message2 = "@corvus and @raven, what are your opinions?"
     mentions2 = parse_mentions(message2)
-    assert set(mentions2) == {"corvus", "raven"}, f"Multiple mentions failed: {mentions2}"
     # Test no mentions
     message3 = "what does everyone think?"
@@ -143,7 +143,7 @@ def test_parse_mentions_function():
     # Test mention with punctuation
     message4 = "@corvus, what about tool use? @raven?"
     mentions4 = parse_mentions(message4)
-    assert set(mentions4) == {"corvus", "raven"}, f"Mentions with punctuation failed: {mentions4}"
     print("Parse mentions function tests passed")

     # Test single mention
     message1 = "@corvus what do you think?"
     mentions1 = parse_mentions(message1)
+    assert mentions1 == ["Corvus"], f"Single mention failed: {mentions1}"
     # Test multiple mentions
     message2 = "@corvus and @raven, what are your opinions?"
     mentions2 = parse_mentions(message2)
+    assert set(mentions2) == {"Corvus", "Raven"}, f"Multiple mentions failed: {mentions2}"
     # Test no mentions
     message3 = "what does everyone think?"
     # Test mention with punctuation
     message4 = "@corvus, what about tool use? @raven?"
     mentions4 = parse_mentions(message4)
+    assert set(mentions4) == {"Corvus", "Raven"}, f"Mentions with punctuation failed: {mentions4}"
     print("Parse mentions function tests passed")

tests/integration/test_deliberation.py CHANGED Viewed

@@ -39,7 +39,8 @@ async def test_summariser_options():
     question = "What is the impact of urbanization on crows?"
     # use a specific character as summariser
-    for char_name, *_ in CHARACTERS:
         result = await deliberate(question, summariser=char_name)
         assert result["final_summary"]["by"] == char_name
@@ -47,9 +48,11 @@ async def test_summariser_options():
 async def test_format_and_structure_options():
     question = "Can crows understand human gestures?"
-    # test 'chat' format
     chat_result = await deliberate(question, format="chat", structure="nested")
-    assert all("role" in entry and "content" in entry for entry in chat_result["history"])
     # test flat structure
     flat_result = await deliberate(question, format="llm", structure="flat")
@@ -64,8 +67,11 @@ async def test_random_seed_reproducibility():
     # the character order should be identical with the same seed
     assert r1["character_order"] == r2["character_order"]
-    # the final summary should also match
-    assert r1["final_summary"]["content"] == r2["final_summary"]["content"]
 @pytest.mark.asyncio
 async def test_character_registry_populated():

     question = "What is the impact of urbanization on crows?"
     # use a specific character as summariser
+    for char in CHARACTERS:
+        char_name = char.name
         result = await deliberate(question, summariser=char_name)
         assert result["final_summary"]["by"] == char_name
 async def test_format_and_structure_options():
     question = "Can crows understand human gestures?"
+    # test 'chat' format - now returns HTML string
     chat_result = await deliberate(question, format="chat", structure="nested")
+    history = chat_result["history"]
+    assert isinstance(history, str), f"Chat format should return HTML string, got {type(history)}"
+    assert "deliberation-container" in history, "Missing HTML container in chat format"
     # test flat structure
     flat_result = await deliberate(question, format="llm", structure="flat")
     # the character order should be identical with the same seed
     assert r1["character_order"] == r2["character_order"]
+    # the final summary should be very similar (allowing for minor LLM variations)
+    summary1 = r1["final_summary"]["content"]
+    summary2 = r2["final_summary"]["content"]
+    # Check if summaries are substantially similar (first 100 chars should match)
+    assert summary1[:100] == summary2[:100], f"Summaries differ too much: {summary1[:100]} vs {summary2[:100]}"
 @pytest.mark.asyncio
 async def test_character_registry_populated():