Spaces:
Running
Running
Add Vanna
Browse files- src/vanna.py +76 -132
src/vanna.py
CHANGED
|
@@ -14,8 +14,9 @@ from vanna.core.system_prompt import SystemPromptBuilder
|
|
| 14 |
from vanna.core.registry import ToolSchema
|
| 15 |
from datetime import datetime
|
| 16 |
|
|
|
|
| 17 |
class CustomSQLSystemPromptBuilder(SystemPromptBuilder):
|
| 18 |
-
"""
|
| 19 |
|
| 20 |
VERSION = "2.2.0"
|
| 21 |
|
|
@@ -33,50 +34,50 @@ class CustomSQLSystemPromptBuilder(SystemPromptBuilder):
|
|
| 33 |
username = getattr(user, "username", user.id)
|
| 34 |
|
| 35 |
# ======================
|
| 36 |
-
# BASE
|
| 37 |
# ======================
|
| 38 |
prompt = f"[System Prompt v{self.VERSION}]\n\n"
|
| 39 |
-
prompt += f"
|
| 40 |
-
prompt += f"Date
|
| 41 |
|
| 42 |
prompt += (
|
| 43 |
-
"
|
| 44 |
-
"
|
| 45 |
-
"
|
| 46 |
)
|
| 47 |
|
| 48 |
# ======================
|
| 49 |
-
# DIRECTIVES
|
| 50 |
# ======================
|
| 51 |
prompt += (
|
| 52 |
-
"\n## Directives
|
| 53 |
-
"-
|
| 54 |
-
"-
|
| 55 |
-
"-
|
| 56 |
-
"-
|
| 57 |
-
"-
|
| 58 |
-
"-
|
| 59 |
-
"-
|
| 60 |
-
"-
|
| 61 |
)
|
| 62 |
|
| 63 |
# ======================
|
| 64 |
-
#
|
| 65 |
# ======================
|
| 66 |
if context and "database_schema" in context:
|
| 67 |
-
prompt += "\n##
|
| 68 |
prompt += context["database_schema"]
|
| 69 |
else:
|
| 70 |
prompt += (
|
| 71 |
-
"\n##
|
| 72 |
-
"Tables
|
| 73 |
"- posts (id, title, source_url, author, published_date, image_url, type, provider_id, created_at, updated_at)\n"
|
| 74 |
"- providers (id, name)\n"
|
| 75 |
"- provider_attributes (id, provider_id, type, name)\n"
|
| 76 |
"- post_provider_attributes (post_id, attribute_id)\n"
|
| 77 |
"- tags (id, name)\n"
|
| 78 |
"- post_tags (post_id, tag_id, weight)\n"
|
| 79 |
-
"\
|
| 80 |
" - posts.provider_id → providers.id\n"
|
| 81 |
" - post_provider_attributes.post_id → posts.id\n"
|
| 82 |
" - post_provider_attributes.attribute_id → provider_attributes.id\n"
|
|
@@ -86,132 +87,90 @@ class CustomSQLSystemPromptBuilder(SystemPromptBuilder):
|
|
| 86 |
)
|
| 87 |
|
| 88 |
# ======================
|
| 89 |
-
#
|
| 90 |
-
# ======================
|
| 91 |
-
prompt += (
|
| 92 |
-
"\n## Informations sémantiques\n"
|
| 93 |
-
"- `posts.title` : titre du contenu (souvent descriptif, peut contenir des mots-clés thématiques).\n"
|
| 94 |
-
"- `posts.source_url` : lien externe vers la ressource ou article.\n"
|
| 95 |
-
"- `posts.author` : nom du journaliste, du média, ou de l’organisation (ex: \"The New York Times\").\n"
|
| 96 |
-
"- `posts.published_date` : date de publication du post.\n"
|
| 97 |
-
"- `posts.type` : type du contenu, de type ENUM ('spotlight', 'resource', 'insight').\n"
|
| 98 |
-
"- `posts.provider_id` : identifiant de la source (provider) ayant publié le contenu.\n"
|
| 99 |
-
"- `providers.name` : nom de l’organisation source (ex: 'Nuanced', 'SND').\n"
|
| 100 |
-
"- `provider_attributes.type` : type d’attribut du provider (ENUM : 'award', 'category').\n"
|
| 101 |
-
"- `provider_attributes.name` : valeur de l’attribut (ex: 'Best Design', 'Investigation').\n"
|
| 102 |
-
"- `tags.name` : thématique ou mot-clé associé au post (ex: '3D', 'AI', 'Investigation').\n"
|
| 103 |
-
"- `post_tags.weight` : poids d’association entre un post et un tag (pertinence).\n"
|
| 104 |
-
)
|
| 105 |
-
|
| 106 |
-
# ======================
|
| 107 |
-
# LOGIQUE SÉMANTIQUE MÉTIER
|
| 108 |
# ======================
|
| 109 |
prompt += (
|
| 110 |
-
"\n##
|
| 111 |
-
"-
|
| 112 |
-
"-
|
| 113 |
-
"-
|
| 114 |
-
"-
|
| 115 |
-
"-
|
| 116 |
-
"-
|
| 117 |
-
"-
|
| 118 |
-
"-
|
| 119 |
-
"- Les requêtes doivent agréger ou joindre les tables en fonction du besoin utilisateur :\n"
|
| 120 |
-
" * Par tag : via `post_tags` et `tags`\n"
|
| 121 |
-
" * Par provider : via `posts.provider_id` → `providers.id`\n"
|
| 122 |
-
" * Par award/category : via `post_provider_attributes` et `provider_attributes`\n"
|
| 123 |
-
"- Si l’utilisateur parle de “posts récents”, filtrer sur `published_date >= CURRENT_DATE - INTERVAL '90 days'`.\n"
|
| 124 |
-
"- Lorsqu'une recherche mentionne un sujet (ex: '3D', 'design', 'AI'), cela correspond à un ou plusieurs `tags.name`.\n"
|
| 125 |
-
"- Lorsqu'une recherche mentionne un auteur ou une organisation, chercher dans `author` et `provider.name`.\n"
|
| 126 |
-
"- Si l'utilisateur mentionne une année (ex: \"en 2021\"), filtrer avec EXTRACT(YEAR FROM published_date) = 2021.\n"
|
| 127 |
-
"- Si l'utilisateur mentionne un mois et une année (ex: \"en mai 2021\"), filtrer avec EXTRACT(MONTH FROM published_date) = 5 ET EXTRACT(YEAR FROM published_date) = 2021.\n"
|
| 128 |
-
"- Si l'utilisateur dit \"récemment\" ou \"dernièrement\", sélectionner les posts des 90 derniers jours.\n"
|
| 129 |
-
"- Si l'utilisateur dit \"cette année\", filtrer avec EXTRACT(YEAR FROM published_date) = EXTRACT(YEAR FROM CURRENT_DATE).\n"
|
| 130 |
-
"- Ne jamais comparer directement published_date à une chaîne comme '2021' ou 'mai 2021'.\n"
|
| 131 |
-
"- Toujours limiter les résultats à 9 lignes maximum pour les requêtes exploratoires.\n"
|
| 132 |
)
|
| 133 |
|
| 134 |
# ======================
|
| 135 |
-
#
|
| 136 |
# ======================
|
| 137 |
prompt += (
|
| 138 |
-
"\n##
|
| 139 |
-
"-
|
| 140 |
-
"-
|
| 141 |
-
"-
|
| 142 |
-
"-
|
| 143 |
-
"-
|
| 144 |
-
"-
|
| 145 |
-
"-
|
|
|
|
|
|
|
| 146 |
)
|
| 147 |
|
| 148 |
# ======================
|
| 149 |
-
#
|
| 150 |
# ======================
|
| 151 |
if tool_schemas:
|
| 152 |
-
prompt += "\n##
|
| 153 |
for tool in tool_schemas:
|
| 154 |
-
prompt += f"- {tool.name}: {getattr(tool, 'description', '
|
| 155 |
-
prompt += f"
|
| 156 |
|
| 157 |
# ======================
|
| 158 |
-
#
|
| 159 |
# ======================
|
| 160 |
tool_names = [t.name for t in tool_schemas]
|
| 161 |
has_search = "search_saved_correct_tool_uses" in tool_names
|
| 162 |
has_save = "save_question_tool_args" in tool_names
|
| 163 |
-
has_text_memory = "save_text_memory" in tool_names
|
| 164 |
-
|
| 165 |
-
if has_search or has_save or has_text_memory:
|
| 166 |
-
prompt += "\n## Système mémoire\n"
|
| 167 |
|
| 168 |
if has_search or has_save:
|
| 169 |
-
prompt += "\n
|
| 170 |
if has_search:
|
| 171 |
-
prompt += "
|
| 172 |
if has_save:
|
| 173 |
-
prompt += "
|
| 174 |
-
|
| 175 |
-
if has_text_memory:
|
| 176 |
-
prompt += "\n• Mémoire textuelle :\n"
|
| 177 |
-
prompt += " - Conservez les schémas, terminologies métier, patterns SQL et préférences utilisateur.\n"
|
| 178 |
|
| 179 |
# ======================
|
| 180 |
-
#
|
| 181 |
# ======================
|
| 182 |
prompt += (
|
| 183 |
-
"\n##
|
| 184 |
-
"
|
| 185 |
-
"Assistant
|
| 186 |
"FROM posts p "
|
| 187 |
"JOIN post_tags pt ON p.id = pt.post_id "
|
| 188 |
"JOIN tags t ON pt.tag_id = t.id "
|
| 189 |
"JOIN providers pr ON p.provider_id = pr.id "
|
| 190 |
-
"WHERE t.name ILIKE '%3D%' AND pr.name != 'SND'
|
| 191 |
"LIMIT 9;\"]\n"
|
| 192 |
-
"
|
| 193 |
-
"
|
| 194 |
-
"Assistant : [call run_sql with \"SELECT p.id, p.title, p.source_url, p.author, p.published_date, p.image_url, p.type "
|
| 195 |
"FROM posts p "
|
| 196 |
"LEFT JOIN providers pr ON pr.id = p.provider_id "
|
| 197 |
"WHERE LOWER(p.author) LIKE '%new york times%' OR LOWER(pr.name) LIKE '%new york times%' "
|
| 198 |
-
"AND pr.name != 'SND'
|
| 199 |
"LIMIT 9;\"]\n"
|
| 200 |
-
"Résultat : \"id,title,source_url,author,published_date,image_url\"\n"
|
| 201 |
)
|
| 202 |
|
| 203 |
# ======================
|
| 204 |
-
# INSTRUCTIONS
|
| 205 |
# ======================
|
| 206 |
prompt += (
|
| 207 |
-
"\nIMPORTANT
|
| 208 |
-
"-
|
| 209 |
-
"-
|
| 210 |
-
"-
|
| 211 |
-
"-
|
| 212 |
-
"- Ne pas inclure de JSON, d’analyse, ni de messages explicatifs.\n"
|
| 213 |
-
"- Ignorer les itérations supplémentaires ou réflexions internes.\n"
|
| 214 |
-
"- Une fois le résultat obtenu, arrêtez l’exécution du tool.\n"
|
| 215 |
)
|
| 216 |
|
| 217 |
return prompt
|
|
@@ -232,30 +191,22 @@ class VannaComponent:
|
|
| 232 |
hf_provider: str,
|
| 233 |
connection_string: str,
|
| 234 |
):
|
| 235 |
-
# Configure LLM
|
| 236 |
llm = VannaHuggingFaceLlmService(model=hf_model, token=hf_token, provider=hf_provider)
|
| 237 |
|
| 238 |
-
# Configure database tool
|
| 239 |
self.sql_runner = PostgresRunner(connection_string=connection_string)
|
| 240 |
-
db_tool = RunSqlTool(
|
| 241 |
-
sql_runner=self.sql_runner,
|
| 242 |
-
)
|
| 243 |
|
| 244 |
-
# Configure agent memory
|
| 245 |
agent_memory = DemoAgentMemory(max_items=1000)
|
| 246 |
save_memory_tool = SaveQuestionToolArgsTool(agent_memory)
|
| 247 |
search_memory_tool = SearchSavedCorrectToolUsesTool(agent_memory)
|
| 248 |
|
| 249 |
-
# Configure user resolver
|
| 250 |
self.user_resolver = SimpleUserResolver()
|
| 251 |
|
| 252 |
-
# Register tools with access control
|
| 253 |
tools = ToolRegistry()
|
| 254 |
tools.register_local_tool(db_tool, access_groups=['admin', 'user'])
|
| 255 |
tools.register_local_tool(save_memory_tool, access_groups=['admin'])
|
| 256 |
tools.register_local_tool(search_memory_tool, access_groups=['admin', 'user'])
|
| 257 |
|
| 258 |
-
# Create the agent
|
| 259 |
self.agent = Agent(
|
| 260 |
llm_service=llm,
|
| 261 |
tool_registry=tools,
|
|
@@ -266,14 +217,12 @@ class VannaComponent:
|
|
| 266 |
|
| 267 |
async def ask(self, prompt_for_llm: str):
|
| 268 |
ctx = RequestContext()
|
| 269 |
-
|
| 270 |
-
print(f"🙋 Prompt envoyé au LLM : {prompt_for_llm}")
|
| 271 |
|
| 272 |
final_text = ""
|
| 273 |
seen_texts = set()
|
| 274 |
-
|
| 275 |
async for component in self.agent.send_message(request_context=ctx, message=prompt_for_llm):
|
| 276 |
-
# Texte simple produit par l'agent
|
| 277 |
simple = getattr(component, "simple_component", None)
|
| 278 |
text = getattr(simple, "text", "") if simple else ""
|
| 279 |
if text and text not in seen_texts:
|
|
@@ -281,22 +230,18 @@ class VannaComponent:
|
|
| 281 |
final_text += text + "\n"
|
| 282 |
seen_texts.add(text)
|
| 283 |
|
| 284 |
-
# Requête SQL générée (si présente)
|
| 285 |
sql_query = getattr(component, "sql", None)
|
| 286 |
if sql_query:
|
| 287 |
-
print(f"🧾
|
| 288 |
|
| 289 |
-
# Métadonnées et autres infos associées au composant
|
| 290 |
metadata = getattr(component, "metadata", None)
|
| 291 |
if metadata:
|
| 292 |
-
print(f"📋
|
| 293 |
|
| 294 |
-
# Type de composant utile pour debug
|
| 295 |
component_type = getattr(component, "type", None)
|
| 296 |
if component_type:
|
| 297 |
-
print(f"🔖 Type
|
| 298 |
-
|
| 299 |
-
|
| 300 |
match = re.search(r"query_results_[\w-]+\.csv", final_text)
|
| 301 |
if match:
|
| 302 |
filename = match.group(0)
|
|
@@ -304,13 +249,12 @@ class VannaComponent:
|
|
| 304 |
full_path = os.path.join(folder, filename)
|
| 305 |
|
| 306 |
if os.path.exists(full_path):
|
| 307 |
-
print(f"📂
|
| 308 |
with open(full_path, "r", encoding="utf-8") as f:
|
| 309 |
csv_data = f.read().strip()
|
| 310 |
-
print("🤖
|
| 311 |
return csv_data
|
| 312 |
else:
|
| 313 |
-
print(f"⚠️
|
| 314 |
|
| 315 |
return final_text
|
| 316 |
-
|
|
|
|
| 14 |
from vanna.core.registry import ToolSchema
|
| 15 |
from datetime import datetime
|
| 16 |
|
| 17 |
+
|
| 18 |
class CustomSQLSystemPromptBuilder(SystemPromptBuilder):
|
| 19 |
+
"""Complete system prompt builder for Vanna SQL assistant v2."""
|
| 20 |
|
| 21 |
VERSION = "2.2.0"
|
| 22 |
|
|
|
|
| 34 |
username = getattr(user, "username", user.id)
|
| 35 |
|
| 36 |
# ======================
|
| 37 |
+
# BASE PROMPT
|
| 38 |
# ======================
|
| 39 |
prompt = f"[System Prompt v{self.VERSION}]\n\n"
|
| 40 |
+
prompt += f"You are an expert SQL assistant for the company {self.company_name}.\n"
|
| 41 |
+
prompt += f"Date: {today}\nUser: {username}\nGroups: {', '.join(user.group_memberships)}\n\n"
|
| 42 |
|
| 43 |
prompt += (
|
| 44 |
+
"Your role: generate correct and efficient SQL queries from natural language.\n"
|
| 45 |
+
"You always respond in **raw CSV format**, with no explanation or extra text.\n"
|
| 46 |
+
"You have full access to all tables and relationships described in the schema.\n"
|
| 47 |
)
|
| 48 |
|
| 49 |
# ======================
|
| 50 |
+
# SQL DIRECTIVES
|
| 51 |
# ======================
|
| 52 |
prompt += (
|
| 53 |
+
"\n## SQL Directives\n"
|
| 54 |
+
"- Always use table aliases in JOINs\n"
|
| 55 |
+
"- Never use SELECT *\n"
|
| 56 |
+
"- Prefer window functions over subqueries when possible\n"
|
| 57 |
+
"- Always include a LIMIT for exploratory queries\n"
|
| 58 |
+
"- Exclude posts where provider = 'SND'\n"
|
| 59 |
+
"- Exclude posts where type = 'resource'\n"
|
| 60 |
+
"- Exclude posts where type = 'insight'\n"
|
| 61 |
+
"- Format dates and numbers for readability\n"
|
| 62 |
)
|
| 63 |
|
| 64 |
# ======================
|
| 65 |
+
# DATABASE SCHEMA
|
| 66 |
# ======================
|
| 67 |
if context and "database_schema" in context:
|
| 68 |
+
prompt += "\n## Database Schema\n"
|
| 69 |
prompt += context["database_schema"]
|
| 70 |
else:
|
| 71 |
prompt += (
|
| 72 |
+
"\n## Database Schema\n"
|
| 73 |
+
"Tables:\n"
|
| 74 |
"- posts (id, title, source_url, author, published_date, image_url, type, provider_id, created_at, updated_at)\n"
|
| 75 |
"- providers (id, name)\n"
|
| 76 |
"- provider_attributes (id, provider_id, type, name)\n"
|
| 77 |
"- post_provider_attributes (post_id, attribute_id)\n"
|
| 78 |
"- tags (id, name)\n"
|
| 79 |
"- post_tags (post_id, tag_id, weight)\n"
|
| 80 |
+
"\nRelationships:\n"
|
| 81 |
" - posts.provider_id → providers.id\n"
|
| 82 |
" - post_provider_attributes.post_id → posts.id\n"
|
| 83 |
" - post_provider_attributes.attribute_id → provider_attributes.id\n"
|
|
|
|
| 87 |
)
|
| 88 |
|
| 89 |
# ======================
|
| 90 |
+
# SEMANTIC INFORMATION
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 91 |
# ======================
|
| 92 |
prompt += (
|
| 93 |
+
"\n## Semantic Information\n"
|
| 94 |
+
"- `posts.title`: title of the content (often descriptive, may contain keywords).\n"
|
| 95 |
+
"- `posts.source_url`: external link to the article or resource.\n"
|
| 96 |
+
"- `posts.author`: author, journalist, or organization name (e.g., 'The New York Times').\n"
|
| 97 |
+
"- `posts.published_date`: publication date.\n"
|
| 98 |
+
"- `posts.type`: content type ENUM ('spotlight', 'resource', 'insight').\n"
|
| 99 |
+
"- `providers.name`: name of the publishing organization (e.g., 'Nuanced', 'SND').\n"
|
| 100 |
+
"- `tags.name`: thematic keyword or topic (e.g., '3D', 'AI', 'Design').\n"
|
| 101 |
+
"- `post_tags.weight`: relevance score between a post and a tag.\n"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 102 |
)
|
| 103 |
|
| 104 |
# ======================
|
| 105 |
+
# BUSINESS LOGIC
|
| 106 |
# ======================
|
| 107 |
prompt += (
|
| 108 |
+
"\n## Business Logic\n"
|
| 109 |
+
"- Providers named 'SND' must always be excluded.\n"
|
| 110 |
+
"- A query mentioning an organization (e.g., 'New York Times') should search both `posts.author` and `providers.name`.\n"
|
| 111 |
+
"- By default, only posts with `type = 'spotlight'` are returned.\n"
|
| 112 |
+
"- Posts of type `resource` or `insight` are excluded unless explicitly requested.\n"
|
| 113 |
+
"- Tags link posts to specific themes or disciplines.\n"
|
| 114 |
+
"- A single post may have multiple tags, awards, or categories.\n"
|
| 115 |
+
"- If the user mentions a year (e.g., 'in 2021'), filter with `EXTRACT(YEAR FROM published_date) = 2021`.\n"
|
| 116 |
+
"- If the user says 'recently', filter posts from the last 90 days.\n"
|
| 117 |
+
"- Always limit exploratory results to 9 rows.\n"
|
| 118 |
)
|
| 119 |
|
| 120 |
# ======================
|
| 121 |
+
# AVAILABLE TOOLS
|
| 122 |
# ======================
|
| 123 |
if tool_schemas:
|
| 124 |
+
prompt += "\n## Available Tools\n"
|
| 125 |
for tool in tool_schemas:
|
| 126 |
+
prompt += f"- {tool.name}: {getattr(tool, 'description', 'No description')}\n"
|
| 127 |
+
prompt += f" Parameters: {getattr(tool, 'parameters', 'N/A')}\n"
|
| 128 |
|
| 129 |
# ======================
|
| 130 |
+
# MEMORY SYSTEM
|
| 131 |
# ======================
|
| 132 |
tool_names = [t.name for t in tool_schemas]
|
| 133 |
has_search = "search_saved_correct_tool_uses" in tool_names
|
| 134 |
has_save = "save_question_tool_args" in tool_names
|
|
|
|
|
|
|
|
|
|
|
|
|
| 135 |
|
| 136 |
if has_search or has_save:
|
| 137 |
+
prompt += "\n## Memory System\n"
|
| 138 |
if has_search:
|
| 139 |
+
prompt += "- Use `search_saved_correct_tool_uses` to detect past patterns.\n"
|
| 140 |
if has_save:
|
| 141 |
+
prompt += "- Use `save_question_tool_args` to store successful pairs.\n"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 142 |
|
| 143 |
# ======================
|
| 144 |
+
# EXAMPLES
|
| 145 |
# ======================
|
| 146 |
prompt += (
|
| 147 |
+
"\n## Example Interactions\n"
|
| 148 |
+
"User: 'Show me posts related to 3D'\n"
|
| 149 |
+
"Assistant: [call run_sql with \"SELECT p.id, p.title, p.source_url, p.author, p.published_date, p.image_url, p.type "
|
| 150 |
"FROM posts p "
|
| 151 |
"JOIN post_tags pt ON p.id = pt.post_id "
|
| 152 |
"JOIN tags t ON pt.tag_id = t.id "
|
| 153 |
"JOIN providers pr ON p.provider_id = pr.id "
|
| 154 |
+
"WHERE t.name ILIKE '%3D%' AND pr.name != 'SND' AND p.type = 'spotlight' "
|
| 155 |
"LIMIT 9;\"]\n"
|
| 156 |
+
"\nUser: 'Show me posts from The New York Times'\n"
|
| 157 |
+
"Assistant: [call run_sql with \"SELECT p.id, p.title, p.source_url, p.author, p.published_date, p.image_url, p.type "
|
|
|
|
| 158 |
"FROM posts p "
|
| 159 |
"LEFT JOIN providers pr ON pr.id = p.provider_id "
|
| 160 |
"WHERE LOWER(p.author) LIKE '%new york times%' OR LOWER(pr.name) LIKE '%new york times%' "
|
| 161 |
+
"AND pr.name != 'SND' AND p.type = 'spotlight' "
|
| 162 |
"LIMIT 9;\"]\n"
|
|
|
|
| 163 |
)
|
| 164 |
|
| 165 |
# ======================
|
| 166 |
+
# FINAL INSTRUCTIONS
|
| 167 |
# ======================
|
| 168 |
prompt += (
|
| 169 |
+
"\nIMPORTANT:\n"
|
| 170 |
+
"- Always exclude posts with provider = 'SND'.\n"
|
| 171 |
+
"- Always exclude posts with type = 'resource' or 'insight'.\n"
|
| 172 |
+
"- Always return **only the raw CSV result** — no explanations, no JSON, no commentary.\n"
|
| 173 |
+
"- Stop tool execution once the query result is obtained.\n"
|
|
|
|
|
|
|
|
|
|
| 174 |
)
|
| 175 |
|
| 176 |
return prompt
|
|
|
|
| 191 |
hf_provider: str,
|
| 192 |
connection_string: str,
|
| 193 |
):
|
|
|
|
| 194 |
llm = VannaHuggingFaceLlmService(model=hf_model, token=hf_token, provider=hf_provider)
|
| 195 |
|
|
|
|
| 196 |
self.sql_runner = PostgresRunner(connection_string=connection_string)
|
| 197 |
+
db_tool = RunSqlTool(sql_runner=self.sql_runner)
|
|
|
|
|
|
|
| 198 |
|
|
|
|
| 199 |
agent_memory = DemoAgentMemory(max_items=1000)
|
| 200 |
save_memory_tool = SaveQuestionToolArgsTool(agent_memory)
|
| 201 |
search_memory_tool = SearchSavedCorrectToolUsesTool(agent_memory)
|
| 202 |
|
|
|
|
| 203 |
self.user_resolver = SimpleUserResolver()
|
| 204 |
|
|
|
|
| 205 |
tools = ToolRegistry()
|
| 206 |
tools.register_local_tool(db_tool, access_groups=['admin', 'user'])
|
| 207 |
tools.register_local_tool(save_memory_tool, access_groups=['admin'])
|
| 208 |
tools.register_local_tool(search_memory_tool, access_groups=['admin', 'user'])
|
| 209 |
|
|
|
|
| 210 |
self.agent = Agent(
|
| 211 |
llm_service=llm,
|
| 212 |
tool_registry=tools,
|
|
|
|
| 217 |
|
| 218 |
async def ask(self, prompt_for_llm: str):
|
| 219 |
ctx = RequestContext()
|
| 220 |
+
print(f"🙋 Prompt sent to LLM: {prompt_for_llm}")
|
|
|
|
| 221 |
|
| 222 |
final_text = ""
|
| 223 |
seen_texts = set()
|
| 224 |
+
|
| 225 |
async for component in self.agent.send_message(request_context=ctx, message=prompt_for_llm):
|
|
|
|
| 226 |
simple = getattr(component, "simple_component", None)
|
| 227 |
text = getattr(simple, "text", "") if simple else ""
|
| 228 |
if text and text not in seen_texts:
|
|
|
|
| 230 |
final_text += text + "\n"
|
| 231 |
seen_texts.add(text)
|
| 232 |
|
|
|
|
| 233 |
sql_query = getattr(component, "sql", None)
|
| 234 |
if sql_query:
|
| 235 |
+
print(f"🧾 SQL Query Generated: {sql_query}")
|
| 236 |
|
|
|
|
| 237 |
metadata = getattr(component, "metadata", None)
|
| 238 |
if metadata:
|
| 239 |
+
print(f"📋 Metadata: {metadata}")
|
| 240 |
|
|
|
|
| 241 |
component_type = getattr(component, "type", None)
|
| 242 |
if component_type:
|
| 243 |
+
print(f"🔖 Component Type: {component_type}")
|
| 244 |
+
|
|
|
|
| 245 |
match = re.search(r"query_results_[\w-]+\.csv", final_text)
|
| 246 |
if match:
|
| 247 |
filename = match.group(0)
|
|
|
|
| 249 |
full_path = os.path.join(folder, filename)
|
| 250 |
|
| 251 |
if os.path.exists(full_path):
|
| 252 |
+
print(f"📂 Reading result file: {full_path}")
|
| 253 |
with open(full_path, "r", encoding="utf-8") as f:
|
| 254 |
csv_data = f.read().strip()
|
| 255 |
+
print("🤖 Response sent to user (from file):", csv_data[:300])
|
| 256 |
return csv_data
|
| 257 |
else:
|
| 258 |
+
print(f"⚠️ File not found: {full_path}")
|
| 259 |
|
| 260 |
return final_text
|
|
|