github-actions[bot] commited on
Commit
6acd2be
·
1 Parent(s): 8ad3d05

Auto-sync from demo at Fri Nov 7 12:11:52 UTC 2025

Browse files
graphgen/graphgen.py CHANGED
@@ -96,7 +96,7 @@ class GraphGen:
96
  """
97
  read files from input sources
98
  """
99
- data = read_files(read_config["input_file"], self.working_dir)
100
  if len(data) == 0:
101
  logger.warning("No data to process")
102
  return
 
96
  """
97
  read files from input sources
98
  """
99
+ data = read_files(**read_config, cache_dir=self.working_dir)
100
  if len(data) == 0:
101
  logger.warning("No data to process")
102
  return
graphgen/operators/read/read_files.py CHANGED
@@ -1,5 +1,5 @@
1
  from pathlib import Path
2
- from typing import Any, Dict, List
3
 
4
  from graphgen.models import (
5
  CSVReader,
@@ -34,26 +34,49 @@ def _build_reader(suffix: str, cache_dir: str | None):
34
  return _MAPPING[suffix]()
35
 
36
 
37
- def read_files(file_path: str, cache_dir: str | None = None) -> list[dict]:
38
- path = Path(file_path).expanduser()
 
 
 
 
39
  if not path.exists():
40
- raise FileNotFoundError(f"input_path not found: {file_path}")
41
 
 
 
 
 
 
 
42
  if path.is_file():
43
- suffix = path.suffix.lstrip(".")
 
 
 
 
 
 
 
 
44
  reader = _build_reader(suffix, cache_dir)
45
  return reader.read(str(path))
46
 
47
- support_suffix = set(_MAPPING.keys())
48
  files_to_read = [
49
  p for p in path.rglob("*") if p.suffix.lstrip(".").lower() in support_suffix
50
  ]
51
- logger.info("Found %d file(s) under folder %s", len(files_to_read), file_path)
 
 
 
 
 
52
 
53
  all_docs: List[Dict[str, Any]] = []
54
  for p in files_to_read:
55
  try:
56
- suffix = p.suffix.lstrip(".")
57
  reader = _build_reader(suffix, cache_dir)
58
  all_docs.extend(reader.read(str(p)))
59
  except Exception as e: # pylint: disable=broad-except
 
1
  from pathlib import Path
2
+ from typing import Any, Dict, List, Optional
3
 
4
  from graphgen.models import (
5
  CSVReader,
 
34
  return _MAPPING[suffix]()
35
 
36
 
37
+ def read_files(
38
+ input_file: str,
39
+ allowed_suffix: Optional[List[str]] = None,
40
+ cache_dir: Optional[str] = None,
41
+ ) -> list[dict]:
42
+ path = Path(input_file).expanduser()
43
  if not path.exists():
44
+ raise FileNotFoundError(f"input_path not found: {input_file}")
45
 
46
+ if allowed_suffix is None:
47
+ support_suffix = set(_MAPPING.keys())
48
+ else:
49
+ support_suffix = {s.lower().lstrip(".") for s in allowed_suffix}
50
+
51
+ # single file
52
  if path.is_file():
53
+ suffix = path.suffix.lstrip(".").lower()
54
+ if suffix not in support_suffix:
55
+ logger.warning(
56
+ "Skip file %s (suffix '%s' not in allowed_suffix %s)",
57
+ path,
58
+ suffix,
59
+ support_suffix,
60
+ )
61
+ return []
62
  reader = _build_reader(suffix, cache_dir)
63
  return reader.read(str(path))
64
 
65
+ # folder
66
  files_to_read = [
67
  p for p in path.rglob("*") if p.suffix.lstrip(".").lower() in support_suffix
68
  ]
69
+ logger.info(
70
+ "Found %d eligible file(s) under folder %s (allowed_suffix=%s)",
71
+ len(files_to_read),
72
+ input_file,
73
+ support_suffix,
74
+ )
75
 
76
  all_docs: List[Dict[str, Any]] = []
77
  for p in files_to_read:
78
  try:
79
+ suffix = p.suffix.lstrip(".").lower()
80
  reader = _build_reader(suffix, cache_dir)
81
  all_docs.extend(reader.read(str(p)))
82
  except Exception as e: # pylint: disable=broad-except