Spaces:
Running
Running
github-actions[bot]
commited on
Commit
·
bda6eda
1
Parent(s):
b9d49bb
Auto-sync from demo at Mon Sep 29 09:57:21 UTC 2025
Browse files- graphgen/generate.py +8 -8
- graphgen/graphgen.py +7 -2
graphgen/generate.py
CHANGED
|
@@ -16,8 +16,6 @@ load_dotenv()
|
|
| 16 |
|
| 17 |
def set_working_dir(folder):
|
| 18 |
os.makedirs(folder, exist_ok=True)
|
| 19 |
-
os.makedirs(os.path.join(folder, "data", "graphgen"), exist_ok=True)
|
| 20 |
-
os.makedirs(os.path.join(folder, "logs"), exist_ok=True)
|
| 21 |
|
| 22 |
|
| 23 |
def save_config(config_path, global_config):
|
|
@@ -48,17 +46,20 @@ def main():
|
|
| 48 |
args = parser.parse_args()
|
| 49 |
|
| 50 |
working_dir = args.output_dir
|
| 51 |
-
set_working_dir(working_dir)
|
| 52 |
|
| 53 |
with open(args.config_file, "r", encoding="utf-8") as f:
|
| 54 |
config = yaml.load(f, Loader=yaml.FullLoader)
|
| 55 |
|
| 56 |
output_data_type = config["output_data_type"]
|
| 57 |
unique_id = int(time.time())
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 58 |
set_logger(
|
| 59 |
-
os.path.join(
|
| 60 |
-
working_dir, "logs", f"graphgen_{output_data_type}_{unique_id}.log"
|
| 61 |
-
),
|
| 62 |
if_stream=True,
|
| 63 |
)
|
| 64 |
logger.info(
|
|
@@ -94,8 +95,7 @@ def main():
|
|
| 94 |
else:
|
| 95 |
raise ValueError(f"Unsupported output data type: {output_data_type}")
|
| 96 |
|
| 97 |
-
|
| 98 |
-
save_config(os.path.join(output_path, f"config-{unique_id}.yaml"), config)
|
| 99 |
logger.info("GraphGen completed successfully. Data saved to %s", output_path)
|
| 100 |
|
| 101 |
|
|
|
|
| 16 |
|
| 17 |
def set_working_dir(folder):
|
| 18 |
os.makedirs(folder, exist_ok=True)
|
|
|
|
|
|
|
| 19 |
|
| 20 |
|
| 21 |
def save_config(config_path, global_config):
|
|
|
|
| 46 |
args = parser.parse_args()
|
| 47 |
|
| 48 |
working_dir = args.output_dir
|
|
|
|
| 49 |
|
| 50 |
with open(args.config_file, "r", encoding="utf-8") as f:
|
| 51 |
config = yaml.load(f, Loader=yaml.FullLoader)
|
| 52 |
|
| 53 |
output_data_type = config["output_data_type"]
|
| 54 |
unique_id = int(time.time())
|
| 55 |
+
|
| 56 |
+
output_path = os.path.join(
|
| 57 |
+
working_dir, "data", "graphgen", f"{unique_id}_{output_data_type}"
|
| 58 |
+
)
|
| 59 |
+
set_working_dir(output_path)
|
| 60 |
+
|
| 61 |
set_logger(
|
| 62 |
+
os.path.join(output_path, f"{unique_id}.log"),
|
|
|
|
|
|
|
| 63 |
if_stream=True,
|
| 64 |
)
|
| 65 |
logger.info(
|
|
|
|
| 95 |
else:
|
| 96 |
raise ValueError(f"Unsupported output data type: {output_data_type}")
|
| 97 |
|
| 98 |
+
save_config(os.path.join(output_path, "config.yaml"), config)
|
|
|
|
| 99 |
logger.info("GraphGen completed successfully. Data saved to %s", output_path)
|
| 100 |
|
| 101 |
|
graphgen/graphgen.py
CHANGED
|
@@ -102,8 +102,13 @@ class GraphGen:
|
|
| 102 |
self.working_dir, namespace="rephrase"
|
| 103 |
)
|
| 104 |
self.qa_storage: JsonListStorage = JsonListStorage(
|
| 105 |
-
os.path.join(
|
| 106 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 107 |
)
|
| 108 |
|
| 109 |
async def async_split_chunks(self, data: List[Union[List, Dict]]) -> dict:
|
|
|
|
| 102 |
self.working_dir, namespace="rephrase"
|
| 103 |
)
|
| 104 |
self.qa_storage: JsonListStorage = JsonListStorage(
|
| 105 |
+
os.path.join(
|
| 106 |
+
self.working_dir,
|
| 107 |
+
"data",
|
| 108 |
+
"graphgen",
|
| 109 |
+
f"{self.unique_id}_{self.config['output_data_type']}",
|
| 110 |
+
),
|
| 111 |
+
namespace="qa",
|
| 112 |
)
|
| 113 |
|
| 114 |
async def async_split_chunks(self, data: List[Union[List, Dict]]) -> dict:
|