github-actions[bot] commited on
Commit
90052d3
·
1 Parent(s): 7735526

Auto-sync from demo at Wed Nov 5 07:25:37 UTC 2025

Browse files
graphgen/models/reader/rdf_reader.py ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Any, Dict, List
2
+
3
+ import rdflib
4
+ from rdflib import Literal
5
+ from rdflib.util import guess_format
6
+
7
+ from graphgen.bases.base_reader import BaseReader
8
+
9
+
10
+ class RDFReader(BaseReader):
11
+ """
12
+ Reader for RDF files that extracts triples and represents them as dictionaries.
13
+ """
14
+
15
+ def read(self, file_path: str) -> List[Dict[str, Any]]:
16
+ g = rdflib.Graph()
17
+ fmt = guess_format(file_path)
18
+ try:
19
+ g.parse(file_path, format=fmt)
20
+ except Exception as e:
21
+ raise ValueError(f"Cannot parse RDF file {file_path}: {e}") from e
22
+
23
+ docs: List[Dict[str, Any]] = []
24
+ text_col = self.text_column
25
+
26
+ for subj in set(g.subjects()):
27
+ literals = []
28
+ props = {}
29
+ for _, pred, obj in g.triples((subj, None, None)):
30
+ pred_str = str(pred)
31
+ if isinstance(obj, Literal):
32
+ literals.append(str(obj))
33
+ props.setdefault(pred_str, []).append(str(obj))
34
+
35
+ text = " ".join(literals).strip()
36
+ if not text:
37
+ raise ValueError(
38
+ f"Subject {subj} has no literal values; "
39
+ f"missing '{text_col}' for text column."
40
+ )
41
+
42
+ doc = {"id": str(subj), text_col: text, "properties": props}
43
+ docs.append(doc)
44
+
45
+ if not docs:
46
+ raise ValueError("RDF file contains no valid documents.")
47
+
48
+ return self.filter(docs)
requirements.txt CHANGED
@@ -24,6 +24,9 @@ leidenalg
24
  igraph
25
  python-louvain
26
 
 
 
 
27
  # Bioinformatics
28
  biopython
29
 
 
24
  igraph
25
  python-louvain
26
 
27
+ # KG
28
+ rdflib
29
+
30
  # Bioinformatics
31
  biopython
32