MnemoCore / vector_core /prep_migration.py
Granis87's picture
Initial upload of MnemoCore
dbb04e4 verified
import json
import time
import sys
import os
# Placeholder for FAISS/SentenceTransformer (to be installed)
# This script prepares the data for vectorization
def load_memories(memory_path):
data = []
# Handle JSONL format
with open(memory_path, 'r') as f:
for line in f:
if line.strip():
try:
data.append(json.loads(line))
except json.JSONDecodeError:
continue
print(f"Loaded {len(data)} memories.")
return data
def prepare_corpus(memories):
corpus = []
ids = []
for m in memories:
# Combine relevant fields for embedding
text = f"{m.get('content', '')} {m.get('context', '')} {' '.join(m.get('tags', []))}"
corpus.append(text)
ids.append(m.get('id'))
return ids, corpus
if __name__ == "__main__":
memory_file = "haim/data/memory.jsonl"
if not os.path.exists(memory_file):
print(f"Error: {memory_file} not found.")
sys.exit(1)
ids, corpus = prepare_corpus(load_memories(memory_file))
print(f"Prepared {len(corpus)} text chunks for embedding.")
# Save prepared corpus for the actual vectorization step
with open("haim/vector_core/corpus_ready.json", "w") as f:
json.dump({"ids": ids, "corpus": corpus}, f)
print("Corpus saved to haim/vector_core/corpus_ready.json")