File size: 15,216 Bytes
2e77d9c
cd8fe6f
2e77d9c
cd8fe6f
 
 
 
 
 
 
2e77d9c
 
 
 
cd8fe6f
 
f0fe4b2
cd8fe6f
 
 
0682824
f0fe4b2
ff0ec75
 
0806ad3
cd8fe6f
ff0ec75
0806ad3
 
 
 
cd8fe6f
 
 
 
 
0682824
cd8fe6f
f0fe4b2
cd8fe6f
2e77d9c
ff0ec75
cd8fe6f
ff0ec75
0dc1136
ff0ec75
 
 
cd8fe6f
ff0ec75
cd8fe6f
 
 
 
0682824
 
 
 
 
ff0ec75
 
 
0682824
ff0ec75
 
 
 
 
 
 
 
 
0682824
ff0ec75
7b2e267
ff0ec75
0806ad3
 
 
 
 
 
 
 
 
 
 
 
 
 
ff0ec75
 
0806ad3
 
 
 
 
 
 
 
 
 
ff0ec75
0806ad3
 
 
ff0ec75
0682824
0806ad3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0682824
 
0806ad3
 
 
 
 
0682824
0806ad3
 
 
 
 
0682824
ff0ec75
0682824
 
ff0ec75
 
 
 
 
 
 
 
 
 
 
 
0682824
cd8fe6f
 
 
7b2e267
 
cd8fe6f
 
 
 
 
ff0ec75
 
 
cd8fe6f
0682824
cd8fe6f
 
 
 
 
7b2e267
 
 
cd8fe6f
 
7b2e267
 
 
0682824
cd8fe6f
7b2e267
cd8fe6f
7b2e267
 
 
 
cd8fe6f
 
 
 
2e77d9c
cd8fe6f
ff0ec75
cd8fe6f
0682824
 
ff0ec75
 
0682824
 
ff0ec75
 
 
 
 
 
 
 
 
0682824
0806ad3
 
 
 
 
 
ff0ec75
 
 
 
 
0682824
ff0ec75
 
 
 
 
 
bcbd595
cd8fe6f
 
bcbd595
cd8fe6f
0682824
cd8fe6f
 
 
264c420
cd8fe6f
ff0ec75
cd8fe6f
0682824
 
 
 
 
 
 
 
cd8fe6f
0682824
0806ad3
0682824
ff0ec75
 
 
 
 
 
 
 
 
 
cd8fe6f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0682824
 
cd8fe6f
0682824
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
264c420
0806ad3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ff0ec75
0806ad3
 
 
 
 
 
388edc0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
import os
import json
import subprocess
import time
import shutil
import ast
import glob
from pathlib import Path
from typing import List, Dict, Any, Optional
from huggingface_hub import HfApi, hf_hub_download, InferenceClient

class RecursiveContextManager:
    def __init__(self, repo_path: str):
        self.repo_path = Path(repo_path)
        self.memory_path = self.repo_path / "memory"
        self.notebook_file = self.memory_path / "notebook.json"
        
        # --- AUTHENTICATION ---
        self.token = os.getenv("HF_TOKEN")
        self.dataset_id = os.getenv("DATASET_ID", "Executor-Tyrant-Framework/clawdbot-memory")
        self.client = InferenceClient(token=self.token) if self.token else None
        
        # --- XET / DATABASE INIT ---
        self.xet_root = self.repo_path / "xet_data"
        self.xet_dataset_file = "xet_vectors.json"
        self.xet_store = None
        
        # DEBOUNCE CONTROLS (NEW)
        self._saves_since_xet_backup = 0
        self.XET_BACKUP_EVERY_N = 5  # Backup every 5 conversations
        
        try:
            if (self.repo_path / "xet_storage.py").exists():
                import sys
                sys.path.append(str(self.repo_path))
                from xet_storage import XetVectorStore
                self.xet_store = XetVectorStore(repo_path=str(self.xet_root))
                print("βœ… Xet Storage Driver Loaded.")
        except Exception as e:
            print(f"⚠️ Xet Driver not loaded: {e}")

        # --- RESTORE MEMORY ---
        self._init_memory()
        self._init_xet_memory()

    # =========================================================================
    # 🧠 SYNC LOGIC (Notebook + Xet JSON)
    # =========================================================================
    def _init_memory(self):
        """STARTUP: Download Notebook."""
        self.memory_path.mkdir(parents=True, exist_ok=True)
        if self.token:
            try:
                hf_hub_download(
                    repo_id=self.dataset_id, filename="notebook.json", repo_type="dataset",
                    token=self.token, local_dir=self.memory_path, local_dir_use_symlinks=False
                )
            except Exception: self._save_local([])

    def _init_xet_memory(self):
        """STARTUP: Download Xet Vectors (JSON)."""
        if not self.token or not self.xet_store: return
        try:
            local_path = hf_hub_download(
                repo_id=self.dataset_id, filename=self.xet_dataset_file, repo_type="dataset",
                token=self.token, local_dir=self.memory_path, local_dir_use_symlinks=False
            )
            # Restore to Xet Store
            vectors = json.loads(Path(local_path).read_text())
            for v in vectors:
                self.xet_store.store_vector(v["id"], v["vector"], v["metadata"])
            print(f"🧠 Restored {len(vectors)} vectors from Dataset")
        except Exception as e:
            print(f"⚠️ Xet restore failed (New dataset?): {e}")

    def _backup_xet_to_dataset(self):
        """Sync only NEW vectors since last backup (incremental)."""
        if not self.token or not self.xet_store: 
            return
        
        # Track what we've already backed up via manifest
        manifest_path = self.memory_path / "xet_manifest.json"
        try:
            known_hashes = set(json.loads(manifest_path.read_text()))
        except:
            known_hashes = set()
        
        # Find new vectors (filename IS the content hash in Xet storage)
        new_vectors = []
        current_hashes = set()
        
        for f in self.xet_store.vectors_path.glob("*/*/*"):
            if not f.is_file(): 
                continue
            file_hash = f.name
            current_hashes.add(file_hash)
            
            if file_hash not in known_hashes:
                try:
                    new_vectors.append(json.loads(f.read_text()))
                except:
                    pass
        
        if not new_vectors:
            # Nothing new to sync
            return
        
        try:
            # Download existing vectors from dataset to merge
            existing = []
            try:
                local_path = hf_hub_download(
                    repo_id=self.dataset_id,
                    filename=self.xet_dataset_file,
                    repo_type="dataset",
                    token=self.token,
                    local_dir=self.memory_path,
                    local_dir_use_symlinks=False
                )
                existing = json.loads(Path(local_path).read_text())
            except:
                pass  # File doesn't exist yet
            
            # Merge with deduplication by id
            existing_ids = {v["id"] for v in existing}
            for v in new_vectors:
                if v["id"] not in existing_ids:
                    existing.append(v)
            
            # Upload merged file
            backup_path = self.memory_path / self.xet_dataset_file
            backup_path.write_text(json.dumps(existing, indent=2))
            
            api = HfApi(token=self.token)
            api.upload_file(
                path_or_fileobj=backup_path,
                path_in_repo=self.xet_dataset_file,
                repo_id=self.dataset_id,
                repo_type="dataset",
                commit_message=f"🧠 Xet: +{len(new_vectors)} vectors (total: {len(existing)})"
            )
            
            # Update manifest so we don't re-upload these
            manifest_path.write_text(json.dumps(list(current_hashes)))
            print(f"☁️ Backed up {len(new_vectors)} new vectors")
            
        except Exception as e:
            print(f"⚠️ Xet backup failed: {e}")

    # =========================================================================
    # 🧬 EMBEDDINGS
    # =========================================================================
    def _get_embedding(self, text: str) -> List[float]:
        if not self.client: return [0.0] * 384
        try:
            # feature-extraction returns list of floats
            response = self.client.feature_extraction(text, model="sentence-transformers/all-MiniLM-L6-v2")
            return response[0] if isinstance(response[0], list) else response
        except Exception: return [0.0] * 384

    # =========================================================================
    # πŸ““ NOTEBOOK
    # =========================================================================
    def _save_local(self, notes: List[Dict]):
        self.memory_path.mkdir(parents=True, exist_ok=True)
        self.notebook_file.write_text(json.dumps(notes, indent=2), encoding='utf-8')

    def _save_notebook(self, notes: List[Dict]):
        self._save_local(notes)
        if self.token and self.dataset_id:
            try:
                api = HfApi(token=self.token)
                api.upload_file(
                    path_or_fileobj=self.notebook_file, path_in_repo="notebook.json",
                    repo_id=self.dataset_id, repo_type="dataset",
                    commit_message=f"Notebook Update: {len(notes)}"
                )
            except Exception: pass

    def _load_notebook(self) -> List[Dict]:
        if not self.notebook_file.exists(): return []
        try: return json.loads(self.notebook_file.read_text(encoding='utf-8'))
        except: return []

    def notebook_read(self) -> str:
        notes = self._load_notebook()
        if not notes: return "Notebook is empty."
        return "\n".join([f"[{i}] {n.get('timestamp','')}: {n.get('content','')}" for i, n in enumerate(notes)])

    def notebook_add(self, content: str) -> str:
        notes = self._load_notebook()
        notes.append({"timestamp": time.strftime("%Y-%m-%d %H:%M"), "content": content})
        if len(notes) > 50: notes = notes[-50:]
        self._save_notebook(notes)
        return f"βœ… Note added & synced. ({len(notes)} items)"

    def notebook_delete(self, index: int) -> str:
        notes = self._load_notebook()
        try:
            removed = notes.pop(int(index))
            self._save_notebook(notes)
            return f"πŸ—‘οΈ Deleted note: '{removed.get('content', '')[:20]}...'"
        except IndexError: return "❌ Invalid index."

    # =========================================================================
    # πŸ” SEARCH & MEMORY
    # =========================================================================
    def save_conversation_turn(self, user_msg, assist_msg, turn_id):
        if not self.xet_store: return
        combined = f"USER: {user_msg}\n\nASSISTANT: {assist_msg}"
        vector = self._get_embedding(combined)
        
        self.xet_store.store_vector(
            id=f"conv_{turn_id}_{int(time.time())}",
            vector=vector,
            metadata={
                "type": "conversation",
                "user": user_msg[:500],
                "assistant": assist_msg[:500],
                "content": combined,
                "timestamp": time.time()
            }
        )
        
        # Debounced backup - not every turn
        self._saves_since_xet_backup += 1
        if self._saves_since_xet_backup >= self.XET_BACKUP_EVERY_N:
            self._backup_xet_to_dataset()
            self._saves_since_xet_backup = 0

    def search_conversations(self, query: str, n: int=5) -> List[Dict]:
        if not self.xet_store: return []
        query_vector = self._get_embedding(query)
        results = self.xet_store.similarity_search(query_vector, n)
        
        # Format strictly for app.py
        return [{
            "content": r.get("metadata", {}).get("content", ""),
            "similarity": r.get("similarity", 0),
            "id": r.get("id", "")
        } for r in results]

    def search_code(self, query: str, n: int=5) -> List[Dict]:
        results = []
        try:
            for f in self.repo_path.rglob("*.py"):
                if "venv" in str(f): continue
                txt = f.read_text(errors='ignore')
                if query in txt:
                    results.append({"file": f.name, "snippet": txt[:300]})
        except: pass
        return results[:n]
        
    def search_testament(self, query: str, n: int=5) -> List[Dict]:
        results = []
        try:
            for f in self.repo_path.rglob("*.md"):
                txt = f.read_text(errors='ignore')
                if query.lower() in txt.lower():
                    results.append({"file": f.name, "snippet": txt[:300]})
        except: pass
        return results[:n]

    # =========================================================================
    # πŸ› οΈ STANDARD TOOLS
    # =========================================================================
    def read_file(self, path: str, start_line: int = None, end_line: int = None) -> str:
        try:
            target = self.repo_path / path
            content = target.read_text(encoding='utf-8', errors='ignore')
            lines = content.splitlines()
            if start_line is not None and end_line is not None:
                lines = lines[start_line:end_line]
            return "\n".join(lines)
        except Exception as e: return str(e)

    def list_files(self, path: str = ".", max_depth: int = 3) -> str:
        try:
            target = self.repo_path / path
            if not target.exists(): return "Path not found."
            files = []
            for p in target.rglob("*"):
                if p.is_file() and not any(part.startswith(".") for part in p.parts):
                    files.append(str(p.relative_to(self.repo_path)))
            return "\n".join(files[:50])
        except Exception as e: return str(e)

    def write_file(self, path: str, content: str) -> str:
        try:
            target = self.repo_path / path
            target.parent.mkdir(parents=True, exist_ok=True)
            target.write_text(content, encoding='utf-8')
            return f"βœ… Written to {path}"
        except Exception as e: return str(e)

    def shell_execute(self, command: str) -> str:
        try:
            if any(x in command for x in ["rm -rf /", ":(){ :|:& };:"]): return "❌ Blocked."
            result = subprocess.run(command, shell=True, cwd=str(self.repo_path), capture_output=True, text=True, timeout=10)
            return f"STDOUT:\n{result.stdout}\nSTDERR:\n{result.stderr}"
        except Exception as e: return f"Error: {e}"

    def map_repository_structure(self) -> str:
        graph = {"nodes": [], "edges": []}
        try:
            file_count = 0
            for file_path in self.repo_path.rglob('*.py'):
                if 'venv' in str(file_path): continue
                rel_path = str(file_path.relative_to(self.repo_path))
                content = file_path.read_text(errors='ignore')
                file_count += 1
                graph["nodes"].append({"id": rel_path, "type": "file"})
                try:
                    tree = ast.parse(content)
                    for node in ast.walk(tree):
                        if isinstance(node, (ast.FunctionDef, ast.ClassDef)):
                            node_id = f"{rel_path}::{node.name}"
                            graph["nodes"].append({"id": node_id, "type": "function"})
                except SyntaxError: continue
            return f"βœ… Map Generated: {file_count} files, {len(graph['nodes'])} nodes."
        except Exception as e: return f"❌ Mapping failed: {e}"

    def push_to_github(self, message: str) -> str:
        """Push current state to the connected HF Space (Git)."""
        try:
            subprocess.run(["git", "config", "user.email", "clawdbot@system.local"], check=False)
            subprocess.run(["git", "config", "user.name", "Clawdbot"], check=False)
            subprocess.run(["git", "add", "."], check=True)
            subprocess.run(["git", "commit", "-m", message], check=True)
            # Note: 'git push' requires the token to be in the remote URL or credential helper
            return "βœ… Changes committed (Push requires configured remote with token)."
        except Exception as e: 
            return f"Git Error: {e}"

    def pull_from_github(self, branch: str) -> str:
        """Pull latest from remote."""
        try:
            subprocess.run(["git", "pull", "origin", branch], check=True)
            return f"βœ… Pulled {branch}"
        except Exception as e: 
            return f"Git Pull Error: {e}"

    def create_shadow_branch(self) -> str:
        """Create timestamped backup branch."""
        ts = int(time.time())
        try:
            subprocess.run(["git", "checkout", "-b", f"shadow_{ts}"], check=True)
            return f"βœ… Created branch shadow_{ts}"
        except Exception as e: 
            return f"Error: {e}"
        
    def get_stats(self) -> Dict:
        conv_count = 0
        if self.xet_store:
            try:
                # Count files in the vectors/shard/hash structure
                conv_count = len(list(self.xet_store.vectors_path.glob("*/*/*")))
            except: pass
        return {"total_files": len(list(self.repo_path.rglob("*"))), "conversations": conv_count}