Spaces:

chenzihong
/

GraphGen

Running

App Files Files Community

github-actions[bot] commited on 17 days ago

Commit

dee1edd

1 Parent(s): b7519b4

Auto-sync from demo at Wed Nov 26 09:31:54 UTC 2025

Browse files

Files changed (12) hide show

app.py +12 -0
graphgen/configs/aggregated_config.yaml +17 -7
graphgen/configs/atomic_config.yaml +15 -5
graphgen/configs/cot_config.yaml +14 -5
graphgen/configs/multi_hop_config.yaml +14 -5
graphgen/configs/schema_guided_extraction_config.yaml +8 -3
graphgen/configs/search_config.yaml +5 -2
graphgen/configs/vqa_config.yaml +14 -5
graphgen/engine.py +28 -24
graphgen/graphgen.py +0 -9
graphgen/operators/extract/extract_info.py +1 -1
webui/app.py +12 -0

app.py CHANGED Viewed

@@ -101,12 +101,15 @@ def run_graphgen(params: WebuiParams, progress=gr.Progress()):
     pipeline = [
         {
             "name": "read",
             "params": {
                 "input_file": params.upload_file,
             },
         },
         {
             "name": "chunk",
             "params": {
                 "chunk_size": params.chunk_size,
                 "chunk_overlap": params.chunk_overlap,
@@ -114,6 +117,8 @@ def run_graphgen(params: WebuiParams, progress=gr.Progress()):
         },
         {
             "name": "build_kg",
         },
     ]
@@ -121,6 +126,8 @@ def run_graphgen(params: WebuiParams, progress=gr.Progress()):
         pipeline.append(
             {
                 "name": "quiz_and_judge",
                 "params": {"quiz_samples": params.quiz_samples, "re_judge": True},
             }
         )
@@ -128,6 +135,7 @@ def run_graphgen(params: WebuiParams, progress=gr.Progress()):
             {
                 "name": "partition",
                 "deps": ["quiz_and_judge"],
                 "params": {
                     "method": params.partition_method,
                     "method_params": partition_params,
@@ -138,6 +146,8 @@ def run_graphgen(params: WebuiParams, progress=gr.Progress()):
         pipeline.append(
             {
                 "name": "partition",
                 "params": {
                     "method": params.partition_method,
                     "method_params": partition_params,
@@ -147,6 +157,8 @@ def run_graphgen(params: WebuiParams, progress=gr.Progress()):
     pipeline.append(
         {
             "name": "generate",
             "params": {
                 "method": params.mode,
                 "data_format": params.data_format,

     pipeline = [
         {
             "name": "read",
+            "op_key": "read",
             "params": {
                 "input_file": params.upload_file,
             },
         },
         {
             "name": "chunk",
+            "deps": ["read"],
+            "op_key": "chunk",
             "params": {
                 "chunk_size": params.chunk_size,
                 "chunk_overlap": params.chunk_overlap,
         },
         {
             "name": "build_kg",
+            "deps": ["chunk"],
+            "op_key": "build_kg",
         },
     ]
         pipeline.append(
             {
                 "name": "quiz_and_judge",
+                "deps": ["build_kg"],
+                "op_key": "quiz_and_judge",
                 "params": {"quiz_samples": params.quiz_samples, "re_judge": True},
             }
         )
             {
                 "name": "partition",
                 "deps": ["quiz_and_judge"],
+                "op_key": "partition",
                 "params": {
                     "method": params.partition_method,
                     "method_params": partition_params,
         pipeline.append(
             {
                 "name": "partition",
+                "deps": ["build_kg"],
+                "op_key": "partition",
                 "params": {
                     "method": params.partition_method,
                     "method_params": partition_params,
     pipeline.append(
         {
             "name": "generate",
+            "deps": ["partition"],
+            "op_key": "generate",
             "params": {
                 "method": params.mode,
                 "data_format": params.data_format,

graphgen/configs/aggregated_config.yaml CHANGED Viewed

@@ -1,22 +1,30 @@
 pipeline:
-  - name: read
     params:
       input_file: resources/input_examples/jsonl_demo.jsonl # input file path, support json, jsonl, txt, pdf. See resources/input_examples for examples
-  - name: chunk
     params:
         chunk_size: 1024 # chunk size for text splitting
         chunk_overlap: 100 # chunk overlap for text splitting
-  - name: build_kg
-  - name: quiz_and_judge
     params:
       quiz_samples: 2 # number of quiz samples to generate
       re_judge: false # whether to re-judge the existing quiz samples
-  - name: partition
-    deps: [quiz_and_judge] # ece depends on quiz_and_judge steps
     params:
       method: ece # ece is a custom partition method based on comprehension loss
       method_params:
@@ -25,7 +33,9 @@ pipeline:
         max_tokens_per_community: 10240 # max tokens per community
         unit_sampling: max_loss # unit sampling strategy, support: random, max_loss, min_loss
-  - name: generate
     params:
       method: aggregated # atomic, aggregated, multi_hop, cot, vqa
       data_format: ChatML # Alpaca, Sharegpt, ChatML

 pipeline:
+  - name: read_step # step name is unique in the pipeline, and can be referenced by other steps
+    op_key: read
     params:
       input_file: resources/input_examples/jsonl_demo.jsonl # input file path, support json, jsonl, txt, pdf. See resources/input_examples for examples
+  - name: chunk_step
+    op_key: chunk
+    deps: [read_step] # chunk_step depends on read_step
     params:
         chunk_size: 1024 # chunk size for text splitting
         chunk_overlap: 100 # chunk overlap for text splitting
+  - name: build_kg_step
+    op_key: build_kg
+    deps: [chunk_step] # build_kg_step depends on chunk_step
+  - name: quiz_and_judge_step
+    op_key: quiz_and_judge
+    deps: [build_kg_step] # quiz_and_judge depends on build_kg_step
     params:
       quiz_samples: 2 # number of quiz samples to generate
       re_judge: false # whether to re-judge the existing quiz samples
+  - name: partition_step
+    op_key: partition
+    deps: [quiz_and_judge_step] # partition_step depends on quiz_and_judge_step
     params:
       method: ece # ece is a custom partition method based on comprehension loss
       method_params:
         max_tokens_per_community: 10240 # max tokens per community
         unit_sampling: max_loss # unit sampling strategy, support: random, max_loss, min_loss
+  - name: generate_step
+    op_key: generate
+    deps: [partition_step] # generate_step depends on partition_step
     params:
       method: aggregated # atomic, aggregated, multi_hop, cot, vqa
       data_format: ChatML # Alpaca, Sharegpt, ChatML

graphgen/configs/atomic_config.yaml CHANGED Viewed

@@ -1,21 +1,31 @@
 pipeline:
-  - name: read
     params:
       input_file: resources/input_examples/json_demo.json # input file path, support json, jsonl, txt, csv, pdf. See resources/input_examples for examples
-  - name: chunk
     params:
       chunk_size: 1024 # chunk size for text splitting
       chunk_overlap: 100 # chunk overlap for text splitting
-  - name: build_kg
-  - name: partition
     params:
       method: dfs # partition method, support: dfs, bfs, ece, leiden
       method_params:
         max_units_per_community: 1 # atomic partition, one node or edge per community
-  - name: generate
     params:
       method: atomic # atomic, aggregated, multi_hop, cot, vqa
       data_format: Alpaca # Alpaca, Sharegpt, ChatML

 pipeline:
+  - name: read_step
+    op_key: read
     params:
       input_file: resources/input_examples/json_demo.json # input file path, support json, jsonl, txt, csv, pdf. See resources/input_examples for examples
+  - name: chunk_step
+    op_key: chunk
+    deps: [read_step] # chunk_step depends on read_step
     params:
       chunk_size: 1024 # chunk size for text splitting
       chunk_overlap: 100 # chunk overlap for text splitting
+  - name: build_kg_step
+    op_key: build_kg
+    deps: [chunk_step] # build_kg depends on chunk_step
+  - name: partition_step
+    op_key: partition
+    deps: [build_kg] # partition_step depends on build_kg
     params:
       method: dfs # partition method, support: dfs, bfs, ece, leiden
       method_params:
         max_units_per_community: 1 # atomic partition, one node or edge per community
+  - name: generate_step
+    op_key: generate
+    deps: [partition_step] # generate_step depends on partition_step
     params:
       method: atomic # atomic, aggregated, multi_hop, cot, vqa
       data_format: Alpaca # Alpaca, Sharegpt, ChatML

graphgen/configs/cot_config.yaml CHANGED Viewed

@@ -1,16 +1,23 @@
 pipeline:
-  - name: read
     params:
       input_file: resources/input_examples/txt_demo.txt  # input file path, support json, jsonl, txt, pdf. See resources/input_examples for examples
-  - name: chunk
     params:
         chunk_size: 1024 # chunk size for text splitting
         chunk_overlap: 100 # chunk overlap for text splitting
-  - name: build_kg
-  - name: partition
     params:
       method: leiden # leiden is a partitioner detection algorithm
       method_params:
@@ -18,7 +25,9 @@ pipeline:
         use_lcc: false # whether to use the largest connected component
         random_seed: 42 # random seed for partitioning
-  - name: generate
     params:
       method: cot # atomic, aggregated, multi_hop, cot, vqa
       data_format: Sharegpt # Alpaca, Sharegpt, ChatML

 pipeline:
+  - name: read_step
+    op_key: read
     params:
       input_file: resources/input_examples/txt_demo.txt  # input file path, support json, jsonl, txt, pdf. See resources/input_examples for examples
+  - name: chunk_step
+    op_key: chunk
+    deps: [read_step] # chunk_step depends on read_step
     params:
         chunk_size: 1024 # chunk size for text splitting
         chunk_overlap: 100 # chunk overlap for text splitting
+  - name: build_kg_step
+    op_key: build_kg
+    deps: [chunk_step] # build_kg depends on chunk_step
+  - name: partition_step
+    op_key: partition
+    deps: [build_kg_step] # partition_step depends on build_kg
     params:
       method: leiden # leiden is a partitioner detection algorithm
       method_params:
         use_lcc: false # whether to use the largest connected component
         random_seed: 42 # random seed for partitioning
+  - name: generate_step
+    op_key: generate
+    deps: [partition_step] # generate_step depends on partition_step
     params:
       method: cot # atomic, aggregated, multi_hop, cot, vqa
       data_format: Sharegpt # Alpaca, Sharegpt, ChatML

graphgen/configs/multi_hop_config.yaml CHANGED Viewed

@@ -1,16 +1,23 @@
 pipeline:
-  - name: read
     params:
       input_file: resources/input_examples/csv_demo.csv # input file path, support json, jsonl, txt, pdf. See resources/input_examples for examples
-  - name: chunk
     params:
       chunk_size: 1024 # chunk size for text splitting
       chunk_overlap: 100 # chunk overlap for text splitting
-  - name: build_kg
-  - name: partition
     params:
       method: ece # ece is a custom partition method based on comprehension loss
       method_params:
@@ -19,7 +26,9 @@ pipeline:
         max_tokens_per_community: 10240 # max tokens per community
         unit_sampling: random # unit sampling strategy, support: random, max_loss, min_loss
-  - name: generate
     params:
       method: multi_hop # atomic, aggregated, multi_hop, cot, vqa
       data_format: ChatML # Alpaca, Sharegpt, ChatML

 pipeline:
+  - name: read_step
+    op_key: read
     params:
       input_file: resources/input_examples/csv_demo.csv # input file path, support json, jsonl, txt, pdf. See resources/input_examples for examples
+  - name: chunk_step
+    op_key: chunk
+    deps: [read_step] # chunk_step depends on read_step
     params:
       chunk_size: 1024 # chunk size for text splitting
       chunk_overlap: 100 # chunk overlap for text splitting
+  - name: build_kg_step
+    op_key: build_kg
+    deps: [chunk_step] # build_kg_step depends on chunk_step
+  - name: partition_step
+    op_key: partition
+    deps: [build_kg_step] # partition_step depends on build_kg_step
     params:
       method: ece # ece is a custom partition method based on comprehension loss
       method_params:
         max_tokens_per_community: 10240 # max tokens per community
         unit_sampling: random # unit sampling strategy, support: random, max_loss, min_loss
+  - name: generate_step
+    op_key: generate
+    deps: [partition_step] # generate_step depends on partition_step
     params:
       method: multi_hop # atomic, aggregated, multi_hop, cot, vqa
       data_format: ChatML # Alpaca, Sharegpt, ChatML

graphgen/configs/schema_guided_extraction_config.yaml CHANGED Viewed

@@ -1,15 +1,20 @@
 pipeline:
-  - name: read
     params:
       input_file: resources/input_examples/extract_demo.txt # input file path, support json, jsonl, txt, pdf. See resources/input_examples for examples
-  - name: chunk
     params:
       chunk_size: 20480
       chunk_overlap: 2000
       separators: []
-  - name: extract
     params:
       method: schema_guided # extraction method, support: schema_guided
       schema_file: graphgen/templates/extraction/schemas/legal_contract.json # schema file path for schema_guided method

 pipeline:
+  - name: read_step
+    op_key: read
     params:
       input_file: resources/input_examples/extract_demo.txt # input file path, support json, jsonl, txt, pdf. See resources/input_examples for examples
+  - name: chunk_step
+    op_key: chunk
+    deps: [read_step] # chunk_step depends on read_step
     params:
       chunk_size: 20480
       chunk_overlap: 2000
       separators: []
+  - name: extract_step
+    op_key: extract
+    deps: [chunk_step] # extract_step depends on chunk_step
     params:
       method: schema_guided # extraction method, support: schema_guided
       schema_file: graphgen/templates/extraction/schemas/legal_contract.json # schema file path for schema_guided method

graphgen/configs/search_config.yaml CHANGED Viewed

@@ -1,9 +1,12 @@
 pipeline:
-  - name: read
     params:
       input_file:  resources/input_examples/search_demo.jsonl # input file path, support json, jsonl, txt, pdf. See resources/input_examples for examples
-  - name: search
     params:
       data_sources: [uniprot] # data source for searcher, support: wikipedia, google, uniprot
       uniprot_params:

 pipeline:
+  - name: read_step
+    op_key: read
     params:
       input_file:  resources/input_examples/search_demo.jsonl # input file path, support json, jsonl, txt, pdf. See resources/input_examples for examples
+  - name: search_step
+    op_key: search
+    deps: [read_step] # search_step depends on read_step
     params:
       data_sources: [uniprot] # data source for searcher, support: wikipedia, google, uniprot
       uniprot_params:

graphgen/configs/vqa_config.yaml CHANGED Viewed

@@ -1,23 +1,32 @@
 pipeline:
-  - name: read
     params:
       input_file: resources/input_examples/vqa_demo.json # input file path, support json, jsonl, txt, pdf. See resources/input_examples for examples
-  - name: chunk
     params:
         chunk_size: 1024 # chunk size for text splitting
         chunk_overlap: 100 # chunk overlap for text splitting
-  - name: build_kg
-  - name: partition
     params:
       method: anchor_bfs # partition method
       method_params:
         anchor_type: image # node type to select anchor nodes
         max_units_per_community: 10 # atomic partition, one node or edge per community
-  - name: generate
     params:
       method: vqa # atomic, aggregated, multi_hop, cot, vqa
       data_format: ChatML # Alpaca, Sharegpt, ChatML

 pipeline:
+  - name: read_step
+    op_key: read
     params:
       input_file: resources/input_examples/vqa_demo.json # input file path, support json, jsonl, txt, pdf. See resources/input_examples for examples
+  - name: chunk_step
+    op_key: chunk
+    deps: [read_step] # chunk_step depends on read_step
     params:
         chunk_size: 1024 # chunk size for text splitting
         chunk_overlap: 100 # chunk overlap for text splitting
+  - name: build_kg_step
+    op_key: build_kg
+    deps: [chunk_step] # build_kg depends on chunk_step
+  - name: partition_step
+    op_key: partition
+    deps: [build_kg_step] # partition_step depends on build_kg_step
     params:
       method: anchor_bfs # partition method
       method_params:
         anchor_type: image # node type to select anchor nodes
         max_units_per_community: 10 # atomic partition, one node or edge per community
+  - name: generate_step
+    op_key: generate
+    deps: [partition_step] # generate_step depends on partition_step
     params:
       method: vqa # atomic, aggregated, multi_hop, cot, vqa
       data_format: ChatML # Alpaca, Sharegpt, ChatML

graphgen/engine.py CHANGED Viewed

@@ -4,7 +4,6 @@ orchestration engine for GraphGen
 import threading
 import traceback
-from functools import wraps
 from typing import Any, Callable, List
@@ -27,25 +26,12 @@ class OpNode:
         self.name, self.deps, self.func = name, deps, func
-def op(name: str, deps=None):
-    deps = deps or []
-    def decorator(func):
-        @wraps(func)
-        def _wrapper(*args, **kwargs):
-            return func(*args, **kwargs)
-        _wrapper.op_node = OpNode(name, deps, lambda self, ctx: func(self, **ctx))
-        return _wrapper
-    return decorator
 class Engine:
     def __init__(self, max_workers: int = 4):
         self.max_workers = max_workers
     def run(self, ops: List[OpNode], ctx: Context):
         name2op = {operation.name: operation for operation in ops}
         # topological sort
@@ -81,7 +67,7 @@ class Engine:
                     return
                 try:
                     name2op[n].func(name2op[n], ctx)
-                except Exception:  # pylint: disable=broad-except
                     exc[n] = traceback.format_exc()
                 done[n].set()
@@ -96,6 +82,20 @@ class Engine:
                 + "\n".join(f"---- {op} ----\n{tb}" for op, tb in exc.items())
             )
 def collect_ops(config: dict, graph_gen) -> List[OpNode]:
     """
@@ -106,16 +106,20 @@ def collect_ops(config: dict, graph_gen) -> List[OpNode]:
     ops: List[OpNode] = []
     for stage in config["pipeline"]:
         name = stage["name"]
-        method = getattr(graph_gen, name)
-        op_node = method.op_node
-        # if there are runtime dependencies, override them
-        runtime_deps = stage.get("deps", op_node.deps)
-        op_node.deps = runtime_deps
         if "params" in stage:
-            op_node.func = lambda self, ctx, m=method, sc=stage: m(sc.get("params", {}))
         else:
-            op_node.func = lambda self, ctx, m=method: m()
         ops.append(op_node)
     return ops

 import threading
 import traceback
 from typing import Any, Callable, List
         self.name, self.deps, self.func = name, deps, func
 class Engine:
     def __init__(self, max_workers: int = 4):
         self.max_workers = max_workers
     def run(self, ops: List[OpNode], ctx: Context):
+        self._validate(ops)
         name2op = {operation.name: operation for operation in ops}
         # topological sort
                     return
                 try:
                     name2op[n].func(name2op[n], ctx)
+                except Exception:
                     exc[n] = traceback.format_exc()
                 done[n].set()
                 + "\n".join(f"---- {op} ----\n{tb}" for op, tb in exc.items())
             )
+    @staticmethod
+    def _validate(ops: List[OpNode]):
+        name_set = set()
+        for op in ops:
+            if op.name in name_set:
+                raise ValueError(f"Duplicate operation name: {op.name}")
+            name_set.add(op.name)
+        for op in ops:
+            for dep in op.deps:
+                if dep not in name_set:
+                    raise ValueError(
+                        f"Operation {op.name} has unknown dependency: {dep}"
+                    )
 def collect_ops(config: dict, graph_gen) -> List[OpNode]:
     """
     ops: List[OpNode] = []
     for stage in config["pipeline"]:
         name = stage["name"]
+        method_name = stage.get("op_key")
+        method = getattr(graph_gen, method_name)
+        deps = stage.get("deps", [])
         if "params" in stage:
+            def func(self, ctx, _method=method, _params=stage.get("params", {})):
+                return _method(_params)
         else:
+            def func(self, ctx, _method=method):
+                return _method()
+        op_node = OpNode(name=name, deps=deps, func=func)
         ops.append(op_node)
     return ops

graphgen/graphgen.py CHANGED Viewed

@@ -6,7 +6,6 @@ import gradio as gr
 from graphgen.bases import BaseLLMWrapper
 from graphgen.bases.datatypes import Chunk
-from graphgen.engine import op
 from graphgen.models import (
     JsonKVStorage,
     JsonListStorage,
@@ -89,7 +88,6 @@ class GraphGen:
         # webui
         self.progress_bar: gr.Progress = progress_bar
-    @op("read", deps=[])
     @async_to_sync_method
     async def read(self, read_config: Dict):
         """
@@ -116,7 +114,6 @@ class GraphGen:
         self.full_docs_storage.upsert(new_docs)
         self.full_docs_storage.index_done_callback()
-    @op("chunk", deps=["read"])
     @async_to_sync_method
     async def chunk(self, chunk_config: Dict):
         """
@@ -149,7 +146,6 @@ class GraphGen:
         self.meta_storage.mark_done(self.full_docs_storage)
         self.meta_storage.index_done_callback()
-    @op("build_kg", deps=["chunk"])
     @async_to_sync_method
     async def build_kg(self):
         """
@@ -180,7 +176,6 @@ class GraphGen:
         return _add_entities_and_relations
-    @op("search", deps=["read"])
     @async_to_sync_method
     async def search(self, search_config: Dict):
         logger.info("[Search] %s ...", ", ".join(search_config["data_sources"]))
@@ -206,7 +201,6 @@ class GraphGen:
         self.meta_storage.mark_done(self.full_docs_storage)
         self.meta_storage.index_done_callback()
-    @op("quiz_and_judge", deps=["build_kg"])
     @async_to_sync_method
     async def quiz_and_judge(self, quiz_and_judge_config: Dict):
         logger.warning(
@@ -247,7 +241,6 @@ class GraphGen:
         logger.info("Restarting synthesizer LLM client.")
         self.synthesizer_llm_client.restart()
-    @op("partition", deps=["build_kg"])
     @async_to_sync_method
     async def partition(self, partition_config: Dict):
         batches = await partition_kg(
@@ -259,7 +252,6 @@ class GraphGen:
         self.partition_storage.upsert(batches)
         return batches
-    @op("extract", deps=["chunk"])
     @async_to_sync_method
     async def extract(self, extract_config: Dict):
         logger.info("Extracting information from given chunks...")
@@ -279,7 +271,6 @@ class GraphGen:
         self.meta_storage.mark_done(self.chunks_storage)
         self.meta_storage.index_done_callback()
-    @op("generate", deps=["partition"])
     @async_to_sync_method
     async def generate(self, generate_config: Dict):

 from graphgen.bases import BaseLLMWrapper
 from graphgen.bases.datatypes import Chunk
 from graphgen.models import (
     JsonKVStorage,
     JsonListStorage,
         # webui
         self.progress_bar: gr.Progress = progress_bar
     @async_to_sync_method
     async def read(self, read_config: Dict):
         """
         self.full_docs_storage.upsert(new_docs)
         self.full_docs_storage.index_done_callback()
     @async_to_sync_method
     async def chunk(self, chunk_config: Dict):
         """
         self.meta_storage.mark_done(self.full_docs_storage)
         self.meta_storage.index_done_callback()
     @async_to_sync_method
     async def build_kg(self):
         """
         return _add_entities_and_relations
     @async_to_sync_method
     async def search(self, search_config: Dict):
         logger.info("[Search] %s ...", ", ".join(search_config["data_sources"]))
         self.meta_storage.mark_done(self.full_docs_storage)
         self.meta_storage.index_done_callback()
     @async_to_sync_method
     async def quiz_and_judge(self, quiz_and_judge_config: Dict):
         logger.warning(
         logger.info("Restarting synthesizer LLM client.")
         self.synthesizer_llm_client.restart()
     @async_to_sync_method
     async def partition(self, partition_config: Dict):
         batches = await partition_kg(
         self.partition_storage.upsert(batches)
         return batches
     @async_to_sync_method
     async def extract(self, extract_config: Dict):
         logger.info("Extracting information from given chunks...")
         self.meta_storage.mark_done(self.chunks_storage)
         self.meta_storage.index_done_callback()
     @async_to_sync_method
     async def generate(self, generate_config: Dict):

graphgen/operators/extract/extract_info.py CHANGED Viewed

@@ -31,7 +31,7 @@ async def extract_info(
     else:
         raise ValueError(f"Unsupported extraction method: {method}")
-    chunks = await chunk_storage.get_all()
     chunks = [{k: v} for k, v in chunks.items()]
     logger.info("Start extracting information from %d chunks", len(chunks))

     else:
         raise ValueError(f"Unsupported extraction method: {method}")
+    chunks = chunk_storage.get_all()
     chunks = [{k: v} for k, v in chunks.items()]
     logger.info("Start extracting information from %d chunks", len(chunks))

webui/app.py CHANGED Viewed

@@ -101,12 +101,15 @@ def run_graphgen(params: WebuiParams, progress=gr.Progress()):
     pipeline = [
         {
             "name": "read",
             "params": {
                 "input_file": params.upload_file,
             },
         },
         {
             "name": "chunk",
             "params": {
                 "chunk_size": params.chunk_size,
                 "chunk_overlap": params.chunk_overlap,
@@ -114,6 +117,8 @@ def run_graphgen(params: WebuiParams, progress=gr.Progress()):
         },
         {
             "name": "build_kg",
         },
     ]
@@ -121,6 +126,8 @@ def run_graphgen(params: WebuiParams, progress=gr.Progress()):
         pipeline.append(
             {
                 "name": "quiz_and_judge",
                 "params": {"quiz_samples": params.quiz_samples, "re_judge": True},
             }
         )
@@ -128,6 +135,7 @@ def run_graphgen(params: WebuiParams, progress=gr.Progress()):
             {
                 "name": "partition",
                 "deps": ["quiz_and_judge"],
                 "params": {
                     "method": params.partition_method,
                     "method_params": partition_params,
@@ -138,6 +146,8 @@ def run_graphgen(params: WebuiParams, progress=gr.Progress()):
         pipeline.append(
             {
                 "name": "partition",
                 "params": {
                     "method": params.partition_method,
                     "method_params": partition_params,
@@ -147,6 +157,8 @@ def run_graphgen(params: WebuiParams, progress=gr.Progress()):
     pipeline.append(
         {
             "name": "generate",
             "params": {
                 "method": params.mode,
                 "data_format": params.data_format,

     pipeline = [
         {
             "name": "read",
+            "op_key": "read",
             "params": {
                 "input_file": params.upload_file,
             },
         },
         {
             "name": "chunk",
+            "deps": ["read"],
+            "op_key": "chunk",
             "params": {
                 "chunk_size": params.chunk_size,
                 "chunk_overlap": params.chunk_overlap,
         },
         {
             "name": "build_kg",
+            "deps": ["chunk"],
+            "op_key": "build_kg",
         },
     ]
         pipeline.append(
             {
                 "name": "quiz_and_judge",
+                "deps": ["build_kg"],
+                "op_key": "quiz_and_judge",
                 "params": {"quiz_samples": params.quiz_samples, "re_judge": True},
             }
         )
             {
                 "name": "partition",
                 "deps": ["quiz_and_judge"],
+                "op_key": "partition",
                 "params": {
                     "method": params.partition_method,
                     "method_params": partition_params,
         pipeline.append(
             {
                 "name": "partition",
+                "deps": ["build_kg"],
+                "op_key": "partition",
                 "params": {
                     "method": params.partition_method,
                     "method_params": partition_params,
     pipeline.append(
         {
             "name": "generate",
+            "deps": ["partition"],
+            "op_key": "generate",
             "params": {
                 "method": params.mode,
                 "data_format": params.data_format,