github-actions[bot] commited on
Commit
8e67692
·
1 Parent(s): 930dd4f

Auto-sync from demo at Fri Nov 21 06:02:41 UTC 2025

Browse files
graphgen/graphgen.py CHANGED
@@ -221,6 +221,7 @@ class GraphGen:
221
  self.graph_storage,
222
  self.rephrase_storage,
223
  max_samples,
 
224
  )
225
 
226
  # TODO: assert trainee_llm_client is valid before judge
 
221
  self.graph_storage,
222
  self.rephrase_storage,
223
  max_samples,
224
+ progress_bar=self.progress_bar,
225
  )
226
 
227
  # TODO: assert trainee_llm_client is valid before judge
graphgen/models/__init__.py CHANGED
@@ -4,6 +4,7 @@ from .generator import (
4
  AtomicGenerator,
5
  CoTGenerator,
6
  MultiHopGenerator,
 
7
  VQAGenerator,
8
  )
9
  from .kg_builder import LightRAGKGBuilder, MMKGBuilder
 
4
  AtomicGenerator,
5
  CoTGenerator,
6
  MultiHopGenerator,
7
+ QuizGenerator,
8
  VQAGenerator,
9
  )
10
  from .kg_builder import LightRAGKGBuilder, MMKGBuilder
graphgen/models/generator/__init__.py CHANGED
@@ -2,4 +2,5 @@ from .aggregated_generator import AggregatedGenerator
2
  from .atomic_generator import AtomicGenerator
3
  from .cot_generator import CoTGenerator
4
  from .multi_hop_generator import MultiHopGenerator
 
5
  from .vqa_generator import VQAGenerator
 
2
  from .atomic_generator import AtomicGenerator
3
  from .cot_generator import CoTGenerator
4
  from .multi_hop_generator import MultiHopGenerator
5
+ from .quiz_generator import QuizGenerator
6
  from .vqa_generator import VQAGenerator
graphgen/models/generator/quiz_generator.py ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Any
2
+
3
+ from graphgen.bases import BaseGenerator
4
+ from graphgen.templates import DESCRIPTION_REPHRASING_PROMPT
5
+ from graphgen.utils import detect_main_language, logger
6
+
7
+
8
+ class QuizGenerator(BaseGenerator):
9
+ """
10
+ Quiz Generator rephrases given descriptions to create quiz questions.
11
+ """
12
+
13
+ @staticmethod
14
+ def build_prompt(
15
+ batch: tuple[list[tuple[str, dict]], list[tuple[Any, Any, dict]]]
16
+ ) -> str:
17
+ """
18
+ Build prompt for rephrasing the description.
19
+ :param batch: A tuple containing (nodes, edges) where nodes/edges
20
+ contain description information
21
+ :return: Prompt string
22
+ """
23
+ # Extract description from batch
24
+ # For quiz generator, we expect a special format where
25
+ # the description is passed as the first node's description
26
+ nodes, edges = batch
27
+ if nodes:
28
+ description = nodes[0][1].get("description", "")
29
+ template_type = nodes[0][1].get("template_type", "TEMPLATE")
30
+ elif edges:
31
+ description = edges[0][2].get("description", "")
32
+ template_type = edges[0][2].get("template_type", "TEMPLATE")
33
+ else:
34
+ raise ValueError("Batch must contain at least one node or edge with description")
35
+
36
+ return QuizGenerator.build_prompt_for_description(description, template_type)
37
+
38
+ @staticmethod
39
+ def build_prompt_for_description(description: str, template_type: str = "TEMPLATE") -> str:
40
+ """
41
+ Build prompt for rephrasing a single description.
42
+ :param description: The description to rephrase
43
+ :param template_type: Either "TEMPLATE" (same meaning) or "ANTI_TEMPLATE" (opposite meaning)
44
+ :return: Prompt string
45
+ """
46
+ language = detect_main_language(description)
47
+ prompt = DESCRIPTION_REPHRASING_PROMPT[language][template_type].format(
48
+ input_sentence=description
49
+ )
50
+ return prompt
51
+
52
+ @staticmethod
53
+ def parse_rephrased_text(response: str) -> str:
54
+ """
55
+ Parse the rephrased text from the response.
56
+ :param response:
57
+ :return:
58
+ """
59
+ rephrased_text = response.strip().strip('"')
60
+ logger.debug("Rephrased Text: %s", rephrased_text)
61
+ return rephrased_text
62
+
63
+ @staticmethod
64
+ def parse_response(response: str) -> Any:
65
+ """
66
+ Parse the LLM response. For quiz generator, this returns the rephrased text.
67
+ :param response: LLM response
68
+ :return: Rephrased text
69
+ """
70
+ return QuizGenerator.parse_rephrased_text(response)
graphgen/operators/__init__.py CHANGED
@@ -2,9 +2,8 @@ from .build_kg import build_kg
2
  from .extract import extract_info
3
  from .generate import generate_qas
4
  from .init import init_llm
5
- from .judge import judge_statement
6
  from .partition import partition_kg
7
- from .quiz import quiz
8
  from .read import read_files
9
  from .search import search_all
10
  from .split import chunk_documents
 
2
  from .extract import extract_info
3
  from .generate import generate_qas
4
  from .init import init_llm
 
5
  from .partition import partition_kg
6
+ from .quiz_and_judge import judge_statement, quiz
7
  from .read import read_files
8
  from .search import search_all
9
  from .split import chunk_documents
graphgen/operators/generate/generate_qas.py CHANGED
@@ -1,5 +1,7 @@
1
  from typing import Any
2
 
 
 
3
  from graphgen.bases import BaseLLMWrapper
4
  from graphgen.models import (
5
  AggregatedGenerator,
@@ -19,7 +21,7 @@ async def generate_qas(
19
  ]
20
  ],
21
  generation_config: dict,
22
- progress_bar=None,
23
  ) -> list[dict[str, Any]]:
24
  """
25
  Generate question-answer pairs based on nodes and edges.
 
1
  from typing import Any
2
 
3
+ import gradio as gr
4
+
5
  from graphgen.bases import BaseLLMWrapper
6
  from graphgen.models import (
7
  AggregatedGenerator,
 
21
  ]
22
  ],
23
  generation_config: dict,
24
+ progress_bar: gr.Progress = None,
25
  ) -> list[dict[str, Any]]:
26
  """
27
  Generate question-answer pairs based on nodes and edges.
graphgen/operators/quiz_and_judge/__init__.py ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ from .judge import judge_statement
2
+ from .quiz import quiz
graphgen/operators/{judge.py → quiz_and_judge/judge.py} RENAMED
File without changes
graphgen/operators/{quiz.py → quiz_and_judge/quiz.py} RENAMED
@@ -1,12 +1,10 @@
1
- import asyncio
2
  from collections import defaultdict
3
 
4
- from tqdm.asyncio import tqdm as tqdm_async
5
 
6
  from graphgen.bases import BaseLLMWrapper
7
- from graphgen.models import JsonKVStorage, NetworkXStorage
8
- from graphgen.templates import DESCRIPTION_REPHRASING_PROMPT
9
- from graphgen.utils import detect_main_language, logger
10
 
11
 
12
  async def quiz(
@@ -14,104 +12,76 @@ async def quiz(
14
  graph_storage: NetworkXStorage,
15
  rephrase_storage: JsonKVStorage,
16
  max_samples: int = 1,
17
- max_concurrent: int = 1000,
18
  ) -> JsonKVStorage:
19
  """
20
- Get all edges and quiz them
21
 
22
  :param synth_llm_client: generate statements
23
  :param graph_storage: graph storage instance
24
  :param rephrase_storage: rephrase storage instance
25
  :param max_samples: max samples for each edge
26
- :param max_concurrent: max concurrent
27
  :return:
28
  """
29
 
30
- semaphore = asyncio.Semaphore(max_concurrent)
31
 
32
- async def _process_single_quiz(des: str, prompt: str, gt: str):
33
- async with semaphore:
34
- try:
35
- # 如果在rephrase_storage中已经存在,直接取出
36
- descriptions = await rephrase_storage.get_by_id(des)
37
- if descriptions:
38
- return None
39
 
40
- new_description = await synth_llm_client.generate_answer(
41
- prompt, temperature=1
42
- )
43
- return {des: [(new_description, gt)]}
 
 
44
 
45
- except Exception as e: # pylint: disable=broad-except
46
- logger.error("Error when quizzing description %s: %s", des, e)
47
- return None
48
 
49
  edges = await graph_storage.get_all_edges()
50
  nodes = await graph_storage.get_all_nodes()
51
 
52
  results = defaultdict(list)
53
- tasks = []
54
  for edge in edges:
55
  edge_data = edge[2]
56
-
57
  description = edge_data["description"]
58
- language = "English" if detect_main_language(description) == "en" else "Chinese"
59
 
60
  results[description] = [(description, "yes")]
61
 
62
  for i in range(max_samples):
63
  if i > 0:
64
- tasks.append(
65
- _process_single_quiz(
66
- description,
67
- DESCRIPTION_REPHRASING_PROMPT[language]["TEMPLATE"].format(
68
- input_sentence=description
69
- ),
70
- "yes",
71
- )
72
- )
73
- tasks.append(
74
- _process_single_quiz(
75
- description,
76
- DESCRIPTION_REPHRASING_PROMPT[language]["ANTI_TEMPLATE"].format(
77
- input_sentence=description
78
- ),
79
- "no",
80
- )
81
- )
82
 
83
  for node in nodes:
84
  node_data = node[1]
85
  description = node_data["description"]
86
- language = "English" if detect_main_language(description) == "en" else "Chinese"
87
 
88
  results[description] = [(description, "yes")]
89
 
90
  for i in range(max_samples):
91
  if i > 0:
92
- tasks.append(
93
- _process_single_quiz(
94
- description,
95
- DESCRIPTION_REPHRASING_PROMPT[language]["TEMPLATE"].format(
96
- input_sentence=description
97
- ),
98
- "yes",
99
- )
100
- )
101
- tasks.append(
102
- _process_single_quiz(
103
- description,
104
- DESCRIPTION_REPHRASING_PROMPT[language]["ANTI_TEMPLATE"].format(
105
- input_sentence=description
106
- ),
107
- "no",
108
- )
109
- )
110
-
111
- for result in tqdm_async(
112
- asyncio.as_completed(tasks), total=len(tasks), desc="Quizzing descriptions"
113
- ):
114
- new_result = await result
115
  if new_result:
116
  for key, value in new_result.items():
117
  results[key].extend(value)
 
 
1
  from collections import defaultdict
2
 
3
+ import gradio as gr
4
 
5
  from graphgen.bases import BaseLLMWrapper
6
+ from graphgen.models import JsonKVStorage, NetworkXStorage, QuizGenerator
7
+ from graphgen.utils import logger, run_concurrent
 
8
 
9
 
10
  async def quiz(
 
12
  graph_storage: NetworkXStorage,
13
  rephrase_storage: JsonKVStorage,
14
  max_samples: int = 1,
15
+ progress_bar: gr.Progress = None,
16
  ) -> JsonKVStorage:
17
  """
18
+ Get all edges and quiz them using QuizGenerator.
19
 
20
  :param synth_llm_client: generate statements
21
  :param graph_storage: graph storage instance
22
  :param rephrase_storage: rephrase storage instance
23
  :param max_samples: max samples for each edge
24
+ :param progress_bar
25
  :return:
26
  """
27
 
28
+ generator = QuizGenerator(synth_llm_client)
29
 
30
+ async def _process_single_quiz(item: tuple[str, str, str]):
31
+ description, template_type, gt = item
32
+ try:
33
+ # if rephrase_storage exists already, directly get it
34
+ descriptions = await rephrase_storage.get_by_id(description)
35
+ if descriptions:
36
+ return None
37
 
38
+ prompt = generator.build_prompt_for_description(description, template_type)
39
+ new_description = await synth_llm_client.generate_answer(
40
+ prompt, temperature=1
41
+ )
42
+ rephrased_text = generator.parse_rephrased_text(new_description)
43
+ return {description: [(rephrased_text, gt)]}
44
 
45
+ except Exception as e: # pylint: disable=broad-except
46
+ logger.error("Error when quizzing description %s: %s", description, e)
47
+ return None
48
 
49
  edges = await graph_storage.get_all_edges()
50
  nodes = await graph_storage.get_all_nodes()
51
 
52
  results = defaultdict(list)
53
+ items = []
54
  for edge in edges:
55
  edge_data = edge[2]
 
56
  description = edge_data["description"]
 
57
 
58
  results[description] = [(description, "yes")]
59
 
60
  for i in range(max_samples):
61
  if i > 0:
62
+ items.append((description, "TEMPLATE", "yes"))
63
+ items.append((description, "ANTI_TEMPLATE", "no"))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
 
65
  for node in nodes:
66
  node_data = node[1]
67
  description = node_data["description"]
 
68
 
69
  results[description] = [(description, "yes")]
70
 
71
  for i in range(max_samples):
72
  if i > 0:
73
+ items.append((description, "TEMPLATE", "yes"))
74
+ items.append((description, "ANTI_TEMPLATE", "no"))
75
+
76
+ quiz_results = await run_concurrent(
77
+ _process_single_quiz,
78
+ items,
79
+ desc="Quizzing descriptions",
80
+ unit="description",
81
+ progress_bar=progress_bar,
82
+ )
83
+
84
+ for new_result in quiz_results:
 
 
 
 
 
 
 
 
 
 
 
85
  if new_result:
86
  for key, value in new_result.items():
87
  results[key].extend(value)
graphgen/templates/description_rephrasing.py CHANGED
@@ -110,11 +110,11 @@ Output:
110
 
111
 
112
  DESCRIPTION_REPHRASING_PROMPT= {
113
- "English": {
114
  "ANTI_TEMPLATE": ANTI_TEMPLATE_EN,
115
  "TEMPLATE": TEMPLATE_EN
116
  },
117
- "Chinese": {
118
  "ANTI_TEMPLATE": ANTI_TEMPLATE_ZH,
119
  "TEMPLATE": TEMPLATE_ZH
120
  }
 
110
 
111
 
112
  DESCRIPTION_REPHRASING_PROMPT= {
113
+ "en": {
114
  "ANTI_TEMPLATE": ANTI_TEMPLATE_EN,
115
  "TEMPLATE": TEMPLATE_EN
116
  },
117
+ "zh": {
118
  "ANTI_TEMPLATE": ANTI_TEMPLATE_ZH,
119
  "TEMPLATE": TEMPLATE_ZH
120
  }