File size: 15,216 Bytes
2e77d9c cd8fe6f 2e77d9c cd8fe6f 2e77d9c cd8fe6f f0fe4b2 cd8fe6f 0682824 f0fe4b2 ff0ec75 0806ad3 cd8fe6f ff0ec75 0806ad3 cd8fe6f 0682824 cd8fe6f f0fe4b2 cd8fe6f 2e77d9c ff0ec75 cd8fe6f ff0ec75 0dc1136 ff0ec75 cd8fe6f ff0ec75 cd8fe6f 0682824 ff0ec75 0682824 ff0ec75 0682824 ff0ec75 7b2e267 ff0ec75 0806ad3 ff0ec75 0806ad3 ff0ec75 0806ad3 ff0ec75 0682824 0806ad3 0682824 0806ad3 0682824 0806ad3 0682824 ff0ec75 0682824 ff0ec75 0682824 cd8fe6f 7b2e267 cd8fe6f ff0ec75 cd8fe6f 0682824 cd8fe6f 7b2e267 cd8fe6f 7b2e267 0682824 cd8fe6f 7b2e267 cd8fe6f 7b2e267 cd8fe6f 2e77d9c cd8fe6f ff0ec75 cd8fe6f 0682824 ff0ec75 0682824 ff0ec75 0682824 0806ad3 ff0ec75 0682824 ff0ec75 bcbd595 cd8fe6f bcbd595 cd8fe6f 0682824 cd8fe6f 264c420 cd8fe6f ff0ec75 cd8fe6f 0682824 cd8fe6f 0682824 0806ad3 0682824 ff0ec75 cd8fe6f 0682824 cd8fe6f 0682824 264c420 0806ad3 ff0ec75 0806ad3 388edc0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 |
import os
import json
import subprocess
import time
import shutil
import ast
import glob
from pathlib import Path
from typing import List, Dict, Any, Optional
from huggingface_hub import HfApi, hf_hub_download, InferenceClient
class RecursiveContextManager:
def __init__(self, repo_path: str):
self.repo_path = Path(repo_path)
self.memory_path = self.repo_path / "memory"
self.notebook_file = self.memory_path / "notebook.json"
# --- AUTHENTICATION ---
self.token = os.getenv("HF_TOKEN")
self.dataset_id = os.getenv("DATASET_ID", "Executor-Tyrant-Framework/clawdbot-memory")
self.client = InferenceClient(token=self.token) if self.token else None
# --- XET / DATABASE INIT ---
self.xet_root = self.repo_path / "xet_data"
self.xet_dataset_file = "xet_vectors.json"
self.xet_store = None
# DEBOUNCE CONTROLS (NEW)
self._saves_since_xet_backup = 0
self.XET_BACKUP_EVERY_N = 5 # Backup every 5 conversations
try:
if (self.repo_path / "xet_storage.py").exists():
import sys
sys.path.append(str(self.repo_path))
from xet_storage import XetVectorStore
self.xet_store = XetVectorStore(repo_path=str(self.xet_root))
print("β
Xet Storage Driver Loaded.")
except Exception as e:
print(f"β οΈ Xet Driver not loaded: {e}")
# --- RESTORE MEMORY ---
self._init_memory()
self._init_xet_memory()
# =========================================================================
# π§ SYNC LOGIC (Notebook + Xet JSON)
# =========================================================================
def _init_memory(self):
"""STARTUP: Download Notebook."""
self.memory_path.mkdir(parents=True, exist_ok=True)
if self.token:
try:
hf_hub_download(
repo_id=self.dataset_id, filename="notebook.json", repo_type="dataset",
token=self.token, local_dir=self.memory_path, local_dir_use_symlinks=False
)
except Exception: self._save_local([])
def _init_xet_memory(self):
"""STARTUP: Download Xet Vectors (JSON)."""
if not self.token or not self.xet_store: return
try:
local_path = hf_hub_download(
repo_id=self.dataset_id, filename=self.xet_dataset_file, repo_type="dataset",
token=self.token, local_dir=self.memory_path, local_dir_use_symlinks=False
)
# Restore to Xet Store
vectors = json.loads(Path(local_path).read_text())
for v in vectors:
self.xet_store.store_vector(v["id"], v["vector"], v["metadata"])
print(f"π§ Restored {len(vectors)} vectors from Dataset")
except Exception as e:
print(f"β οΈ Xet restore failed (New dataset?): {e}")
def _backup_xet_to_dataset(self):
"""Sync only NEW vectors since last backup (incremental)."""
if not self.token or not self.xet_store:
return
# Track what we've already backed up via manifest
manifest_path = self.memory_path / "xet_manifest.json"
try:
known_hashes = set(json.loads(manifest_path.read_text()))
except:
known_hashes = set()
# Find new vectors (filename IS the content hash in Xet storage)
new_vectors = []
current_hashes = set()
for f in self.xet_store.vectors_path.glob("*/*/*"):
if not f.is_file():
continue
file_hash = f.name
current_hashes.add(file_hash)
if file_hash not in known_hashes:
try:
new_vectors.append(json.loads(f.read_text()))
except:
pass
if not new_vectors:
# Nothing new to sync
return
try:
# Download existing vectors from dataset to merge
existing = []
try:
local_path = hf_hub_download(
repo_id=self.dataset_id,
filename=self.xet_dataset_file,
repo_type="dataset",
token=self.token,
local_dir=self.memory_path,
local_dir_use_symlinks=False
)
existing = json.loads(Path(local_path).read_text())
except:
pass # File doesn't exist yet
# Merge with deduplication by id
existing_ids = {v["id"] for v in existing}
for v in new_vectors:
if v["id"] not in existing_ids:
existing.append(v)
# Upload merged file
backup_path = self.memory_path / self.xet_dataset_file
backup_path.write_text(json.dumps(existing, indent=2))
api = HfApi(token=self.token)
api.upload_file(
path_or_fileobj=backup_path,
path_in_repo=self.xet_dataset_file,
repo_id=self.dataset_id,
repo_type="dataset",
commit_message=f"π§ Xet: +{len(new_vectors)} vectors (total: {len(existing)})"
)
# Update manifest so we don't re-upload these
manifest_path.write_text(json.dumps(list(current_hashes)))
print(f"βοΈ Backed up {len(new_vectors)} new vectors")
except Exception as e:
print(f"β οΈ Xet backup failed: {e}")
# =========================================================================
# 𧬠EMBEDDINGS
# =========================================================================
def _get_embedding(self, text: str) -> List[float]:
if not self.client: return [0.0] * 384
try:
# feature-extraction returns list of floats
response = self.client.feature_extraction(text, model="sentence-transformers/all-MiniLM-L6-v2")
return response[0] if isinstance(response[0], list) else response
except Exception: return [0.0] * 384
# =========================================================================
# π NOTEBOOK
# =========================================================================
def _save_local(self, notes: List[Dict]):
self.memory_path.mkdir(parents=True, exist_ok=True)
self.notebook_file.write_text(json.dumps(notes, indent=2), encoding='utf-8')
def _save_notebook(self, notes: List[Dict]):
self._save_local(notes)
if self.token and self.dataset_id:
try:
api = HfApi(token=self.token)
api.upload_file(
path_or_fileobj=self.notebook_file, path_in_repo="notebook.json",
repo_id=self.dataset_id, repo_type="dataset",
commit_message=f"Notebook Update: {len(notes)}"
)
except Exception: pass
def _load_notebook(self) -> List[Dict]:
if not self.notebook_file.exists(): return []
try: return json.loads(self.notebook_file.read_text(encoding='utf-8'))
except: return []
def notebook_read(self) -> str:
notes = self._load_notebook()
if not notes: return "Notebook is empty."
return "\n".join([f"[{i}] {n.get('timestamp','')}: {n.get('content','')}" for i, n in enumerate(notes)])
def notebook_add(self, content: str) -> str:
notes = self._load_notebook()
notes.append({"timestamp": time.strftime("%Y-%m-%d %H:%M"), "content": content})
if len(notes) > 50: notes = notes[-50:]
self._save_notebook(notes)
return f"β
Note added & synced. ({len(notes)} items)"
def notebook_delete(self, index: int) -> str:
notes = self._load_notebook()
try:
removed = notes.pop(int(index))
self._save_notebook(notes)
return f"ποΈ Deleted note: '{removed.get('content', '')[:20]}...'"
except IndexError: return "β Invalid index."
# =========================================================================
# π SEARCH & MEMORY
# =========================================================================
def save_conversation_turn(self, user_msg, assist_msg, turn_id):
if not self.xet_store: return
combined = f"USER: {user_msg}\n\nASSISTANT: {assist_msg}"
vector = self._get_embedding(combined)
self.xet_store.store_vector(
id=f"conv_{turn_id}_{int(time.time())}",
vector=vector,
metadata={
"type": "conversation",
"user": user_msg[:500],
"assistant": assist_msg[:500],
"content": combined,
"timestamp": time.time()
}
)
# Debounced backup - not every turn
self._saves_since_xet_backup += 1
if self._saves_since_xet_backup >= self.XET_BACKUP_EVERY_N:
self._backup_xet_to_dataset()
self._saves_since_xet_backup = 0
def search_conversations(self, query: str, n: int=5) -> List[Dict]:
if not self.xet_store: return []
query_vector = self._get_embedding(query)
results = self.xet_store.similarity_search(query_vector, n)
# Format strictly for app.py
return [{
"content": r.get("metadata", {}).get("content", ""),
"similarity": r.get("similarity", 0),
"id": r.get("id", "")
} for r in results]
def search_code(self, query: str, n: int=5) -> List[Dict]:
results = []
try:
for f in self.repo_path.rglob("*.py"):
if "venv" in str(f): continue
txt = f.read_text(errors='ignore')
if query in txt:
results.append({"file": f.name, "snippet": txt[:300]})
except: pass
return results[:n]
def search_testament(self, query: str, n: int=5) -> List[Dict]:
results = []
try:
for f in self.repo_path.rglob("*.md"):
txt = f.read_text(errors='ignore')
if query.lower() in txt.lower():
results.append({"file": f.name, "snippet": txt[:300]})
except: pass
return results[:n]
# =========================================================================
# π οΈ STANDARD TOOLS
# =========================================================================
def read_file(self, path: str, start_line: int = None, end_line: int = None) -> str:
try:
target = self.repo_path / path
content = target.read_text(encoding='utf-8', errors='ignore')
lines = content.splitlines()
if start_line is not None and end_line is not None:
lines = lines[start_line:end_line]
return "\n".join(lines)
except Exception as e: return str(e)
def list_files(self, path: str = ".", max_depth: int = 3) -> str:
try:
target = self.repo_path / path
if not target.exists(): return "Path not found."
files = []
for p in target.rglob("*"):
if p.is_file() and not any(part.startswith(".") for part in p.parts):
files.append(str(p.relative_to(self.repo_path)))
return "\n".join(files[:50])
except Exception as e: return str(e)
def write_file(self, path: str, content: str) -> str:
try:
target = self.repo_path / path
target.parent.mkdir(parents=True, exist_ok=True)
target.write_text(content, encoding='utf-8')
return f"β
Written to {path}"
except Exception as e: return str(e)
def shell_execute(self, command: str) -> str:
try:
if any(x in command for x in ["rm -rf /", ":(){ :|:& };:"]): return "β Blocked."
result = subprocess.run(command, shell=True, cwd=str(self.repo_path), capture_output=True, text=True, timeout=10)
return f"STDOUT:\n{result.stdout}\nSTDERR:\n{result.stderr}"
except Exception as e: return f"Error: {e}"
def map_repository_structure(self) -> str:
graph = {"nodes": [], "edges": []}
try:
file_count = 0
for file_path in self.repo_path.rglob('*.py'):
if 'venv' in str(file_path): continue
rel_path = str(file_path.relative_to(self.repo_path))
content = file_path.read_text(errors='ignore')
file_count += 1
graph["nodes"].append({"id": rel_path, "type": "file"})
try:
tree = ast.parse(content)
for node in ast.walk(tree):
if isinstance(node, (ast.FunctionDef, ast.ClassDef)):
node_id = f"{rel_path}::{node.name}"
graph["nodes"].append({"id": node_id, "type": "function"})
except SyntaxError: continue
return f"β
Map Generated: {file_count} files, {len(graph['nodes'])} nodes."
except Exception as e: return f"β Mapping failed: {e}"
def push_to_github(self, message: str) -> str:
"""Push current state to the connected HF Space (Git)."""
try:
subprocess.run(["git", "config", "user.email", "clawdbot@system.local"], check=False)
subprocess.run(["git", "config", "user.name", "Clawdbot"], check=False)
subprocess.run(["git", "add", "."], check=True)
subprocess.run(["git", "commit", "-m", message], check=True)
# Note: 'git push' requires the token to be in the remote URL or credential helper
return "β
Changes committed (Push requires configured remote with token)."
except Exception as e:
return f"Git Error: {e}"
def pull_from_github(self, branch: str) -> str:
"""Pull latest from remote."""
try:
subprocess.run(["git", "pull", "origin", branch], check=True)
return f"β
Pulled {branch}"
except Exception as e:
return f"Git Pull Error: {e}"
def create_shadow_branch(self) -> str:
"""Create timestamped backup branch."""
ts = int(time.time())
try:
subprocess.run(["git", "checkout", "-b", f"shadow_{ts}"], check=True)
return f"β
Created branch shadow_{ts}"
except Exception as e:
return f"Error: {e}"
def get_stats(self) -> Dict:
conv_count = 0
if self.xet_store:
try:
# Count files in the vectors/shard/hash structure
conv_count = len(list(self.xet_store.vectors_path.glob("*/*/*")))
except: pass
return {"total_files": len(list(self.repo_path.rglob("*"))), "conversations": conv_count}
|