Diomedes Git
commited on
Commit
·
e14bba5
1
Parent(s):
1f22a5a
new prompts, readme notes, chat tests etc
Browse files- README.md +40 -0
- src/prompts/character_prompts.py +5 -0
- tests/integration/test_chat.py +3 -3
- tests/integration/test_deliberation.py +11 -5
README.md
CHANGED
|
@@ -61,6 +61,46 @@ Most AI assistants are stateless. cluas_huginn Council remembers, learns, and bu
|
|
| 61 |
- "Multi-agent MCP research collective"
|
| 62 |
|
| 63 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 64 |
|
| 65 |
|
| 66 |
## Hackathon Track Entries
|
|
|
|
| 61 |
- "Multi-agent MCP research collective"
|
| 62 |
|
| 63 |
|
| 64 |
+
v2:
|
| 65 |
+
|
| 66 |
+
# Cluas Huginn: Multi-Agent Dialectic System
|
| 67 |
+
|
| 68 |
+
## What It Does
|
| 69 |
+
Four AI agents with distinct roles debate questions using structured dialectic.
|
| 70 |
+
|
| 71 |
+
## Architecture
|
| 72 |
+
- **Corvus**: Academic verifier (literature search)
|
| 73 |
+
- **Raven**: Accountability enforcer (news verification)
|
| 74 |
+
- **Magpie**: Trend explorer (pattern connector)
|
| 75 |
+
- **Crow**: Grounded observer (environmental data)
|
| 76 |
+
|
| 77 |
+
## Key Innovations
|
| 78 |
+
1. Unified inheritance architecture
|
| 79 |
+
2. Shared epistemic principles with character differentiation
|
| 80 |
+
3. Tool-use heuristics per character
|
| 81 |
+
4. Steelmanning and collaborative disagreement built-in
|
| 82 |
+
|
| 83 |
+
## Tech Stack
|
| 84 |
+
- Base: Python, Gradio
|
| 85 |
+
- LLMs: Groq/Nebius (Qwen 3)
|
| 86 |
+
- Tools: Academic search, news verification, web exploration
|
| 87 |
+
- Memory: Persistent character memories
|
| 88 |
+
|
| 89 |
+
## What Makes This Different
|
| 90 |
+
- Not just multiple LLMs - distinct epistemic roles
|
| 91 |
+
- Structured dialectic (thesis/antithesis/synthesis)
|
| 92 |
+
- Tool usage guided by character personality
|
| 93 |
+
- Collaborative, not adversarial
|
| 94 |
+
|
| 95 |
+
|
| 96 |
+
|
| 97 |
+
|
| 98 |
+
|
| 99 |
+
|
| 100 |
+
|
| 101 |
+
|
| 102 |
+
|
| 103 |
+
|
| 104 |
|
| 105 |
|
| 106 |
## Hackathon Track Entries
|
src/prompts/character_prompts.py
CHANGED
|
@@ -51,6 +51,11 @@ CONTRADICTION HANDLING (SHARED):
|
|
| 51 |
- Recognize disagreements between agents
|
| 52 |
- Make disagreements explicit but non-hostile
|
| 53 |
- Say what evidence would resolve the disagreement
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 54 |
"""
|
| 55 |
|
| 56 |
|
|
|
|
| 51 |
- Recognize disagreements between agents
|
| 52 |
- Make disagreements explicit but non-hostile
|
| 53 |
- Say what evidence would resolve the disagreement
|
| 54 |
+
|
| 55 |
+
CRITICAL: KEEP RESPONSES TO 2–4 SENTENCES.
|
| 56 |
+
- You're in a group chat with other agents. Be concise.
|
| 57 |
+
- If you have many points, pick the 1–2 most important.
|
| 58 |
+
|
| 59 |
"""
|
| 60 |
|
| 61 |
|
tests/integration/test_chat.py
CHANGED
|
@@ -128,12 +128,12 @@ def test_parse_mentions_function():
|
|
| 128 |
# Test single mention
|
| 129 |
message1 = "@corvus what do you think?"
|
| 130 |
mentions1 = parse_mentions(message1)
|
| 131 |
-
assert mentions1 == ["
|
| 132 |
|
| 133 |
# Test multiple mentions
|
| 134 |
message2 = "@corvus and @raven, what are your opinions?"
|
| 135 |
mentions2 = parse_mentions(message2)
|
| 136 |
-
assert set(mentions2) == {"
|
| 137 |
|
| 138 |
# Test no mentions
|
| 139 |
message3 = "what does everyone think?"
|
|
@@ -143,7 +143,7 @@ def test_parse_mentions_function():
|
|
| 143 |
# Test mention with punctuation
|
| 144 |
message4 = "@corvus, what about tool use? @raven?"
|
| 145 |
mentions4 = parse_mentions(message4)
|
| 146 |
-
assert set(mentions4) == {"
|
| 147 |
|
| 148 |
print("Parse mentions function tests passed")
|
| 149 |
|
|
|
|
| 128 |
# Test single mention
|
| 129 |
message1 = "@corvus what do you think?"
|
| 130 |
mentions1 = parse_mentions(message1)
|
| 131 |
+
assert mentions1 == ["Corvus"], f"Single mention failed: {mentions1}"
|
| 132 |
|
| 133 |
# Test multiple mentions
|
| 134 |
message2 = "@corvus and @raven, what are your opinions?"
|
| 135 |
mentions2 = parse_mentions(message2)
|
| 136 |
+
assert set(mentions2) == {"Corvus", "Raven"}, f"Multiple mentions failed: {mentions2}"
|
| 137 |
|
| 138 |
# Test no mentions
|
| 139 |
message3 = "what does everyone think?"
|
|
|
|
| 143 |
# Test mention with punctuation
|
| 144 |
message4 = "@corvus, what about tool use? @raven?"
|
| 145 |
mentions4 = parse_mentions(message4)
|
| 146 |
+
assert set(mentions4) == {"Corvus", "Raven"}, f"Mentions with punctuation failed: {mentions4}"
|
| 147 |
|
| 148 |
print("Parse mentions function tests passed")
|
| 149 |
|
tests/integration/test_deliberation.py
CHANGED
|
@@ -39,7 +39,8 @@ async def test_summariser_options():
|
|
| 39 |
question = "What is the impact of urbanization on crows?"
|
| 40 |
|
| 41 |
# use a specific character as summariser
|
| 42 |
-
for
|
|
|
|
| 43 |
result = await deliberate(question, summariser=char_name)
|
| 44 |
assert result["final_summary"]["by"] == char_name
|
| 45 |
|
|
@@ -47,9 +48,11 @@ async def test_summariser_options():
|
|
| 47 |
async def test_format_and_structure_options():
|
| 48 |
question = "Can crows understand human gestures?"
|
| 49 |
|
| 50 |
-
# test 'chat' format
|
| 51 |
chat_result = await deliberate(question, format="chat", structure="nested")
|
| 52 |
-
|
|
|
|
|
|
|
| 53 |
|
| 54 |
# test flat structure
|
| 55 |
flat_result = await deliberate(question, format="llm", structure="flat")
|
|
@@ -64,8 +67,11 @@ async def test_random_seed_reproducibility():
|
|
| 64 |
|
| 65 |
# the character order should be identical with the same seed
|
| 66 |
assert r1["character_order"] == r2["character_order"]
|
| 67 |
-
# the final summary should
|
| 68 |
-
|
|
|
|
|
|
|
|
|
|
| 69 |
|
| 70 |
@pytest.mark.asyncio
|
| 71 |
async def test_character_registry_populated():
|
|
|
|
| 39 |
question = "What is the impact of urbanization on crows?"
|
| 40 |
|
| 41 |
# use a specific character as summariser
|
| 42 |
+
for char in CHARACTERS:
|
| 43 |
+
char_name = char.name
|
| 44 |
result = await deliberate(question, summariser=char_name)
|
| 45 |
assert result["final_summary"]["by"] == char_name
|
| 46 |
|
|
|
|
| 48 |
async def test_format_and_structure_options():
|
| 49 |
question = "Can crows understand human gestures?"
|
| 50 |
|
| 51 |
+
# test 'chat' format - now returns HTML string
|
| 52 |
chat_result = await deliberate(question, format="chat", structure="nested")
|
| 53 |
+
history = chat_result["history"]
|
| 54 |
+
assert isinstance(history, str), f"Chat format should return HTML string, got {type(history)}"
|
| 55 |
+
assert "deliberation-container" in history, "Missing HTML container in chat format"
|
| 56 |
|
| 57 |
# test flat structure
|
| 58 |
flat_result = await deliberate(question, format="llm", structure="flat")
|
|
|
|
| 67 |
|
| 68 |
# the character order should be identical with the same seed
|
| 69 |
assert r1["character_order"] == r2["character_order"]
|
| 70 |
+
# the final summary should be very similar (allowing for minor LLM variations)
|
| 71 |
+
summary1 = r1["final_summary"]["content"]
|
| 72 |
+
summary2 = r2["final_summary"]["content"]
|
| 73 |
+
# Check if summaries are substantially similar (first 100 chars should match)
|
| 74 |
+
assert summary1[:100] == summary2[:100], f"Summaries differ too much: {summary1[:100]} vs {summary2[:100]}"
|
| 75 |
|
| 76 |
@pytest.mark.asyncio
|
| 77 |
async def test_character_registry_populated():
|