github-actions[bot] commited on
Commit
816b4b1
·
1 Parent(s): 65b9482

Auto-sync from demo at Wed Nov 5 04:52:21 UTC 2025

Browse files
graphgen/models/reader/pickle_reader.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pickle
2
+ from typing import Any, Dict, List
3
+
4
+ from graphgen.bases.base_reader import BaseReader
5
+
6
+
7
+ class PickleReader(BaseReader):
8
+ """
9
+ Read pickle files, requiring the top-level object to be List[Dict[str, Any]].
10
+ """
11
+
12
+ def read(self, file_path: str) -> List[Dict[str, Any]]:
13
+ with open(file_path, "rb") as f:
14
+ data = pickle.load(f)
15
+
16
+ if not isinstance(data, list):
17
+ raise ValueError("Pickle file must contain a list of documents.")
18
+
19
+ for doc in data:
20
+ if not isinstance(doc, dict):
21
+ raise ValueError("Every item in the list must be a dict.")
22
+ if doc.get("type") == "text" and self.text_column not in doc:
23
+ raise ValueError(f"Missing '{self.text_column}' in document: {doc}")
24
+
25
+ return self.filter(data)