Spaces:

chenzihong
/

GraphGen

Sleeping

App Files Files Community

github-actions[bot] commited on Oct 29

Commit

d02622b

1 Parent(s): d2af8b0

Auto-sync from demo at Wed Oct 29 11:25:28 UTC 2025

Browse files

Files changed (28) hide show

graphgen/bases/__init__.py +1 -1
graphgen/bases/base_generator.py +2 -2
graphgen/bases/base_kg_builder.py +2 -2
graphgen/bases/{base_llm_client.py → base_llm_wrapper.py} +7 -1
graphgen/graphgen.py +35 -30
graphgen/models/__init__.py +1 -2
graphgen/models/kg_builder/light_rag_kg_builder.py +2 -2
graphgen/models/llm/__init__.py +4 -0
graphgen/models/llm/api/__init__.py +0 -0
graphgen/models/llm/api/http_client.py +197 -0
graphgen/models/llm/api/ollama_client.py +105 -0
graphgen/models/llm/{openai_client.py → api/openai_client.py} +4 -4
graphgen/models/llm/local/__init__.py +0 -0
graphgen/models/llm/local/hf_wrapper.py +147 -0
graphgen/models/llm/local/sglang_wrapper.py +148 -0
graphgen/models/llm/local/tgi_wrapper.py +36 -0
graphgen/models/llm/{ollama_client.py → local/trt_wrapper.py} +8 -3
graphgen/models/llm/local/vllm_wrapper.py +137 -0
graphgen/models/llm/topk_token_model.py +0 -53
graphgen/operators/__init__.py +1 -0
graphgen/operators/build_kg/build_mm_kg.py +3 -2
graphgen/operators/build_kg/build_text_kg.py +3 -2
graphgen/operators/generate/generate_qas.py +2 -2
graphgen/operators/init/__init__.py +1 -0
graphgen/operators/init/init_llm.py +84 -0
graphgen/operators/judge.py +3 -2
graphgen/operators/quiz.py +3 -2
requirements.txt +1 -1

graphgen/bases/__init__.py CHANGED Viewed

@@ -1,6 +1,6 @@
 from .base_generator import BaseGenerator
 from .base_kg_builder import BaseKGBuilder
-from .base_llm_client import BaseLLMClient
 from .base_partitioner import BasePartitioner
 from .base_reader import BaseReader
 from .base_splitter import BaseSplitter

 from .base_generator import BaseGenerator
 from .base_kg_builder import BaseKGBuilder
+from .base_llm_wrapper import BaseLLMWrapper
 from .base_partitioner import BasePartitioner
 from .base_reader import BaseReader
 from .base_splitter import BaseSplitter

graphgen/bases/base_generator.py CHANGED Viewed

@@ -1,7 +1,7 @@
 from abc import ABC, abstractmethod
 from typing import Any
-from graphgen.bases.base_llm_client import BaseLLMClient
 class BaseGenerator(ABC):
@@ -9,7 +9,7 @@ class BaseGenerator(ABC):
     Generate QAs based on given prompts.
     """
-    def __init__(self, llm_client: BaseLLMClient):
         self.llm_client = llm_client
     @staticmethod

 from abc import ABC, abstractmethod
 from typing import Any
+from graphgen.bases.base_llm_wrapper import BaseLLMWrapper
 class BaseGenerator(ABC):
     Generate QAs based on given prompts.
     """
+    def __init__(self, llm_client: BaseLLMWrapper):
         self.llm_client = llm_client
     @staticmethod

graphgen/bases/base_kg_builder.py CHANGED Viewed

@@ -2,13 +2,13 @@ from abc import ABC, abstractmethod
 from collections import defaultdict
 from typing import Dict, List, Tuple
-from graphgen.bases.base_llm_client import BaseLLMClient
 from graphgen.bases.base_storage import BaseGraphStorage
 from graphgen.bases.datatypes import Chunk
 class BaseKGBuilder(ABC):
-    def __init__(self, llm_client: BaseLLMClient):
         self.llm_client = llm_client
         self._nodes: Dict[str, List[dict]] = defaultdict(list)
         self._edges: Dict[Tuple[str, str], List[dict]] = defaultdict(list)

 from collections import defaultdict
 from typing import Dict, List, Tuple
+from graphgen.bases.base_llm_wrapper import BaseLLMWrapper
 from graphgen.bases.base_storage import BaseGraphStorage
 from graphgen.bases.datatypes import Chunk
 class BaseKGBuilder(ABC):
+    def __init__(self, llm_client: BaseLLMWrapper):
         self.llm_client = llm_client
         self._nodes: Dict[str, List[dict]] = defaultdict(list)
         self._edges: Dict[Tuple[str, str], List[dict]] = defaultdict(list)

graphgen/bases/{base_llm_client.py → base_llm_wrapper.py} RENAMED Viewed

@@ -8,7 +8,7 @@ from graphgen.bases.base_tokenizer import BaseTokenizer
 from graphgen.bases.datatypes import Token
-class BaseLLMClient(abc.ABC):
     """
     LLM client base class, agnostic to specific backends (OpenAI / Ollama / ...).
     """
@@ -66,3 +66,9 @@ class BaseLLMClient(abc.ABC):
         think_pattern = re.compile(rf"<{think_tag}>.*?</{think_tag}>", re.DOTALL)
         filtered_text = think_pattern.sub("", text).strip()
         return filtered_text if filtered_text else text.strip()

 from graphgen.bases.datatypes import Token
+class BaseLLMWrapper(abc.ABC):
     """
     LLM client base class, agnostic to specific backends (OpenAI / Ollama / ...).
     """
         think_pattern = re.compile(rf"<{think_tag}>.*?</{think_tag}>", re.DOTALL)
         filtered_text = think_pattern.sub("", text).strip()
         return filtered_text if filtered_text else text.strip()
+    def shutdown(self) -> None:
+        """Shutdown the LLM engine if applicable."""
+    def restart(self) -> None:
+        """Reinitialize the LLM engine if applicable."""

graphgen/graphgen.py CHANGED Viewed

@@ -1,11 +1,11 @@
 import asyncio
 import os
 import time
-from dataclasses import dataclass
 from typing import Dict, cast
 import gradio as gr
 from graphgen.bases.base_storage import StorageNameSpace
 from graphgen.bases.datatypes import Chunk
 from graphgen.models import (
@@ -20,6 +20,7 @@ from graphgen.operators import (
     build_text_kg,
     chunk_documents,
     generate_qas,
     judge_statement,
     partition_kg,
     quiz,
@@ -31,40 +32,28 @@ from graphgen.utils import async_to_sync_method, compute_mm_hash, logger
 sys_path = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
-@dataclass
 class GraphGen:
-    unique_id: int = int(time.time())
-    working_dir: str = os.path.join(sys_path, "cache")
-    # llm
-    tokenizer_instance: Tokenizer = None
-    synthesizer_llm_client: OpenAIClient = None
-    trainee_llm_client: OpenAIClient = None
-    # webui
-    progress_bar: gr.Progress = None
-    def __post_init__(self):
-        self.tokenizer_instance: Tokenizer = self.tokenizer_instance or Tokenizer(
             model_name=os.getenv("TOKENIZER_MODEL")
         )
-        self.synthesizer_llm_client: OpenAIClient = (
-            self.synthesizer_llm_client
-            or OpenAIClient(
-                model_name=os.getenv("SYNTHESIZER_MODEL"),
-                api_key=os.getenv("SYNTHESIZER_API_KEY"),
-                base_url=os.getenv("SYNTHESIZER_BASE_URL"),
-                tokenizer=self.tokenizer_instance,
-            )
-        )
-        self.trainee_llm_client: OpenAIClient = self.trainee_llm_client or OpenAIClient(
-            model_name=os.getenv("TRAINEE_MODEL"),
-            api_key=os.getenv("TRAINEE_API_KEY"),
-            base_url=os.getenv("TRAINEE_BASE_URL"),
-            tokenizer=self.tokenizer_instance,
         )
         self.full_docs_storage: JsonKVStorage = JsonKVStorage(
             self.working_dir, namespace="full_docs"
@@ -86,6 +75,9 @@ class GraphGen:
             namespace="qa",
         )
     @async_to_sync_method
     async def insert(self, read_config: Dict, split_config: Dict):
         """
@@ -272,6 +264,12 @@ class GraphGen:
         )
         # TODO： assert trainee_llm_client is valid before judge
         re_judge = quiz_and_judge_config["re_judge"]
         _update_relations = await judge_statement(
             self.trainee_llm_client,
@@ -279,9 +277,16 @@ class GraphGen:
             self.rephrase_storage,
             re_judge,
         )
         await self.rephrase_storage.index_done_callback()
         await _update_relations.index_done_callback()
     @async_to_sync_method
     async def generate(self, partition_config: Dict, generate_config: Dict):
         # Step 1: partition the graph

 import asyncio
 import os
 import time
 from typing import Dict, cast
 import gradio as gr
+from graphgen.bases import BaseLLMWrapper
 from graphgen.bases.base_storage import StorageNameSpace
 from graphgen.bases.datatypes import Chunk
 from graphgen.models import (
     build_text_kg,
     chunk_documents,
     generate_qas,
+    init_llm,
     judge_statement,
     partition_kg,
     quiz,
 sys_path = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
 class GraphGen:
+    def __init__(
+        self,
+        unique_id: int = int(time.time()),
+        working_dir: str = os.path.join(sys_path, "cache"),
+        tokenizer_instance: Tokenizer = None,
+        synthesizer_llm_client: OpenAIClient = None,
+        trainee_llm_client: OpenAIClient = None,
+        progress_bar: gr.Progress = None,
+    ):
+        self.unique_id: int = unique_id
+        self.working_dir: str = working_dir
+        # llm
+        self.tokenizer_instance: Tokenizer = tokenizer_instance or Tokenizer(
             model_name=os.getenv("TOKENIZER_MODEL")
         )
+        self.synthesizer_llm_client: BaseLLMWrapper = (
+            synthesizer_llm_client or init_llm("synthesizer")
         )
+        self.trainee_llm_client: BaseLLMWrapper = trainee_llm_client
         self.full_docs_storage: JsonKVStorage = JsonKVStorage(
             self.working_dir, namespace="full_docs"
             namespace="qa",
         )
+        # webui
+        self.progress_bar: gr.Progress = progress_bar
     @async_to_sync_method
     async def insert(self, read_config: Dict, split_config: Dict):
         """
         )
         # TODO： assert trainee_llm_client is valid before judge
+        if not self.trainee_llm_client:
+            # TODO: shutdown existing synthesizer_llm_client properly
+            logger.info("No trainee LLM client provided, initializing a new one.")
+            self.synthesizer_llm_client.shutdown()
+            self.trainee_llm_client = init_llm("trainee")
         re_judge = quiz_and_judge_config["re_judge"]
         _update_relations = await judge_statement(
             self.trainee_llm_client,
             self.rephrase_storage,
             re_judge,
         )
         await self.rephrase_storage.index_done_callback()
         await _update_relations.index_done_callback()
+        logger.info("Shutting down trainee LLM client.")
+        self.trainee_llm_client.shutdown()
+        self.trainee_llm_client = None
+        logger.info("Restarting synthesizer LLM client.")
+        self.synthesizer_llm_client.restart()
     @async_to_sync_method
     async def generate(self, partition_config: Dict, generate_config: Dict):
         # Step 1: partition the graph

graphgen/models/__init__.py CHANGED Viewed

@@ -7,8 +7,7 @@ from .generator import (
     VQAGenerator,
 )
 from .kg_builder import LightRAGKGBuilder, MMKGBuilder
-from .llm.openai_client import OpenAIClient
-from .llm.topk_token_model import TopkTokenModel
 from .partitioner import (
     AnchorBFSPartitioner,
     BFSPartitioner,

     VQAGenerator,
 )
 from .kg_builder import LightRAGKGBuilder, MMKGBuilder
+from .llm import HTTPClient, OllamaClient, OpenAIClient
 from .partitioner import (
     AnchorBFSPartitioner,
     BFSPartitioner,

graphgen/models/kg_builder/light_rag_kg_builder.py CHANGED Viewed

@@ -2,7 +2,7 @@ import re
 from collections import Counter, defaultdict
 from typing import Dict, List, Tuple
-from graphgen.bases import BaseGraphStorage, BaseKGBuilder, BaseLLMClient, Chunk
 from graphgen.templates import KG_EXTRACTION_PROMPT, KG_SUMMARIZATION_PROMPT
 from graphgen.utils import (
     detect_main_language,
@@ -15,7 +15,7 @@ from graphgen.utils import (
 class LightRAGKGBuilder(BaseKGBuilder):
-    def __init__(self, llm_client: BaseLLMClient, max_loop: int = 3):
         super().__init__(llm_client)
         self.max_loop = max_loop

 from collections import Counter, defaultdict
 from typing import Dict, List, Tuple
+from graphgen.bases import BaseGraphStorage, BaseKGBuilder, BaseLLMWrapper, Chunk
 from graphgen.templates import KG_EXTRACTION_PROMPT, KG_SUMMARIZATION_PROMPT
 from graphgen.utils import (
     detect_main_language,
 class LightRAGKGBuilder(BaseKGBuilder):
+    def __init__(self, llm_client: BaseLLMWrapper, max_loop: int = 3):
         super().__init__(llm_client)
         self.max_loop = max_loop

graphgen/models/llm/__init__.py CHANGED Viewed

	@@ -0,0 +1,4 @@

+from .api.http_client import HTTPClient
+from .api.ollama_client import OllamaClient
+from .api.openai_client import OpenAIClient
+from .local.hf_wrapper import HuggingFaceWrapper

graphgen/models/llm/api/__init__.py ADDED Viewed

File without changes

graphgen/models/llm/api/http_client.py ADDED Viewed

	@@ -0,0 +1,197 @@

+import asyncio
+import math
+from typing import Any, Dict, List, Optional
+import aiohttp
+from tenacity import (
+    retry,
+    retry_if_exception_type,
+    stop_after_attempt,
+    wait_exponential,
+)
+from graphgen.bases.base_llm_wrapper import BaseLLMWrapper
+from graphgen.bases.datatypes import Token
+from graphgen.models.llm.limitter import RPM, TPM
+class HTTPClient(BaseLLMWrapper):
+    """
+    A generic async HTTP client for LLMs compatible with OpenAI's chat/completions format.
+    It uses aiohttp for making requests and includes retry logic and token usage tracking.
+    Usage example:
+        client = HTTPClient(
+            model_name="gpt-4o-mini",
+            base_url="http://localhost:8080",
+            api_key="your_api_key",
+            json_mode=True,
+            seed=42,
+            topk_per_token=5,
+            request_limit=True,
+        )
+        answer = await client.generate_answer("Hello, world!")
+        tokens = await client.generate_topk_per_token("Hello, world!")
+    """
+    _instance: Optional["HTTPClient"] = None
+    _lock = asyncio.Lock()
+    def __new__(cls, **kwargs):
+        if cls._instance is None:
+            cls._instance = super().__new__(cls)
+        return cls._instance
+    def __init__(
+        self,
+        *,
+        model: str,
+        base_url: str,
+        api_key: Optional[str] = None,
+        json_mode: bool = False,
+        seed: Optional[int] = None,
+        topk_per_token: int = 5,
+        request_limit: bool = False,
+        rpm: Optional[RPM] = None,
+        tpm: Optional[TPM] = None,
+        **kwargs: Any,
+    ):
+        # Initialize only once in the singleton pattern
+        if getattr(self, "_initialized", False):
+            return
+        self._initialized: bool = True
+        super().__init__(**kwargs)
+        self.model_name = model
+        self.base_url = base_url.rstrip("/")
+        self.api_key = api_key
+        self.json_mode = json_mode
+        self.seed = seed
+        self.topk_per_token = topk_per_token
+        self.request_limit = request_limit
+        self.rpm = rpm or RPM()
+        self.tpm = tpm or TPM()
+        self.token_usage: List[Dict[str, int]] = []
+        self._session: Optional[aiohttp.ClientSession] = None
+    @property
+    def session(self) -> aiohttp.ClientSession:
+        if self._session is None or self._session.closed:
+            headers = (
+                {"Authorization": f"Bearer {self.api_key}"} if self.api_key else {}
+            )
+            self._session = aiohttp.ClientSession(headers=headers)
+        return self._session
+    async def close(self):
+        if self._session and not self._session.closed:
+            await self._session.close()
+    def _build_body(self, text: str, history: List[str]) -> Dict[str, Any]:
+        messages = []
+        if self.system_prompt:
+            messages.append({"role": "system", "content": self.system_prompt})
+        # chatml format: alternating user and assistant messages
+        if history and isinstance(history[0], dict):
+            messages.extend(history)
+        messages.append({"role": "user", "content": text})
+        body = {
+            "model": self.model_name,
+            "messages": messages,
+            "temperature": self.temperature,
+            "top_p": self.top_p,
+            "max_tokens": self.max_tokens,
+        }
+        if self.seed:
+            body["seed"] = self.seed
+        if self.json_mode:
+            body["response_format"] = {"type": "json_object"}
+        return body
+    @retry(
+        stop=stop_after_attempt(5),
+        wait=wait_exponential(multiplier=1, min=4, max=10),
+        retry=retry_if_exception_type((aiohttp.ClientError, asyncio.TimeoutError)),
+    )
+    async def generate_answer(
+        self,
+        text: str,
+        history: Optional[List[str]] = None,
+        **extra: Any,
+    ) -> str:
+        body = self._build_body(text, history or [])
+        prompt_tokens = sum(
+            len(self.tokenizer.encode(m["content"])) for m in body["messages"]
+        )
+        est = prompt_tokens + body["max_tokens"]
+        if self.request_limit:
+            await self.rpm.wait(silent=True)
+            await self.tpm.wait(est, silent=True)
+        async with self.session.post(
+            f"{self.base_url}/chat/completions",
+            json=body,
+            timeout=aiohttp.ClientTimeout(total=60),
+        ) as resp:
+            resp.raise_for_status()
+            data = await resp.json()
+        msg = data["choices"][0]["message"]["content"]
+        if "usage" in data:
+            self.token_usage.append(
+                {
+                    "prompt_tokens": data["usage"]["prompt_tokens"],
+                    "completion_tokens": data["usage"]["completion_tokens"],
+                    "total_tokens": data["usage"]["total_tokens"],
+                }
+            )
+        return self.filter_think_tags(msg)
+    @retry(
+        stop=stop_after_attempt(5),
+        wait=wait_exponential(multiplier=1, min=4, max=10),
+        retry=retry_if_exception_type((aiohttp.ClientError, asyncio.TimeoutError)),
+    )
+    async def generate_topk_per_token(
+        self,
+        text: str,
+        history: Optional[List[str]] = None,
+        **extra: Any,
+    ) -> List[Token]:
+        body = self._build_body(text, history or [])
+        body["max_tokens"] = 1
+        if self.topk_per_token > 0:
+            body["logprobs"] = True
+            body["top_logprobs"] = self.topk_per_token
+        async with self.session.post(
+            f"{self.base_url}/chat/completions",
+            json=body,
+            timeout=aiohttp.ClientTimeout(total=60),
+        ) as resp:
+            resp.raise_for_status()
+            data = await resp.json()
+        token_logprobs = data["choices"][0]["logprobs"]["content"]
+        tokens = []
+        for item in token_logprobs:
+            candidates = [
+                Token(t["token"], math.exp(t["logprob"])) for t in item["top_logprobs"]
+            ]
+            tokens.append(
+                Token(
+                    item["token"], math.exp(item["logprob"]), top_candidates=candidates
+                )
+            )
+        return tokens
+    async def generate_inputs_prob(
+        self, text: str, history: Optional[List[str]] = None, **extra: Any
+    ) -> List[Token]:
+        raise NotImplementedError(
+            "generate_inputs_prob is not implemented in HTTPClient"
+        )

graphgen/models/llm/api/ollama_client.py ADDED Viewed

	@@ -0,0 +1,105 @@

+from typing import Any, Dict, List, Optional
+from graphgen.bases.base_llm_wrapper import BaseLLMWrapper
+from graphgen.bases.datatypes import Token
+from graphgen.models.llm.limitter import RPM, TPM
+class OllamaClient(BaseLLMWrapper):
+    """
+    Requires a local or remote Ollama server to be running (default port 11434).
+    The top_logprobs field is not yet implemented by the official API.
+    """
+    def __init__(
+        self,
+        *,
+        model: str = "gemma3",
+        base_url: str = "http://localhost:11434",
+        json_mode: bool = False,
+        seed: Optional[int] = None,
+        topk_per_token: int = 5,
+        request_limit: bool = False,
+        rpm: Optional[RPM] = None,
+        tpm: Optional[TPM] = None,
+        **kwargs: Any,
+    ):
+        try:
+            import ollama
+        except ImportError as e:
+            raise ImportError(
+                "Ollama SDK is not installed."
+                "It is required to use OllamaClient."
+                "Please install it with `pip install ollama`."
+            ) from e
+        super().__init__(**kwargs)
+        self.model_name = model
+        self.base_url = base_url
+        self.json_mode = json_mode
+        self.seed = seed
+        self.topk_per_token = topk_per_token
+        self.request_limit = request_limit
+        self.rpm = rpm or RPM()
+        self.tpm = tpm or TPM()
+        self.token_usage: List[Dict[str, int]] = []
+        self.client = ollama.AsyncClient(host=self.base_url)
+    async def generate_answer(
+        self,
+        text: str,
+        history: Optional[List[Dict[str, str]]] = None,
+        **extra: Any,
+    ) -> str:
+        messages = []
+        if self.system_prompt:
+            messages.append({"role": "system", "content": self.system_prompt})
+        if history:
+            messages.extend(history)
+        messages.append({"role": "user", "content": text})
+        options = {
+            "temperature": self.temperature,
+            "top_p": self.top_p,
+            "num_predict": self.max_tokens,
+        }
+        if self.seed is not None:
+            options["seed"] = self.seed
+        prompt_tokens = sum(len(self.tokenizer.encode(m["content"])) for m in messages)
+        est = prompt_tokens + self.max_tokens
+        if self.request_limit:
+            await self.rpm.wait(silent=True)
+            await self.tpm.wait(est, silent=True)
+        response = await self.client.chat(
+            model=self.model_name,
+            messages=messages,
+            format="json" if self.json_mode else "",
+            options=options,
+            stream=False,
+        )
+        usage = response.get("prompt_eval_count", 0), response.get("eval_count", 0)
+        self.token_usage.append(
+            {
+                "prompt_tokens": usage[0],
+                "completion_tokens": usage[1],
+                "total_tokens": sum(usage),
+            }
+        )
+        content = response["message"]["content"]
+        return self.filter_think_tags(content)
+    async def generate_topk_per_token(
+        self,
+        text: str,
+        history: Optional[List[Dict[str, str]]] = None,
+        **extra: Any,
+    ) -> List[Token]:
+        raise NotImplementedError("Ollama API does not support per-token top-k yet.")
+    async def generate_inputs_prob(
+        self, text: str, history: Optional[List[Dict[str, str]]] = None, **extra: Any
+    ) -> List[Token]:
+        raise NotImplementedError("Ollama API does not support per-token logprobs yet.")

graphgen/models/llm/{openai_client.py → api/openai_client.py} RENAMED Viewed

@@ -10,7 +10,7 @@ from tenacity import (
     wait_exponential,
 )
-from graphgen.bases.base_llm_client import BaseLLMClient
 from graphgen.bases.datatypes import Token
 from graphgen.models.llm.limitter import RPM, TPM
@@ -28,7 +28,7 @@ def get_top_response_tokens(response: openai.ChatCompletion) -> List[Token]:
     return tokens
-class OpenAIClient(BaseLLMClient):
     def __init__(
         self,
         *,
@@ -105,8 +105,8 @@ class OpenAIClient(BaseLLMClient):
             kwargs["logprobs"] = True
             kwargs["top_logprobs"] = self.topk_per_token
-        # Limit max_tokens to 5 to avoid long completions
-        kwargs["max_tokens"] = 5
         completion = await self.client.chat.completions.create(  # pylint: disable=E1125
             model=self.model_name, **kwargs

     wait_exponential,
 )
+from graphgen.bases.base_llm_wrapper import BaseLLMWrapper
 from graphgen.bases.datatypes import Token
 from graphgen.models.llm.limitter import RPM, TPM
     return tokens
+class OpenAIClient(BaseLLMWrapper):
     def __init__(
         self,
         *,
             kwargs["logprobs"] = True
             kwargs["top_logprobs"] = self.topk_per_token
+        # Limit max_tokens to 1 to avoid long completions
+        kwargs["max_tokens"] = 1
         completion = await self.client.chat.completions.create(  # pylint: disable=E1125
             model=self.model_name, **kwargs

graphgen/models/llm/local/__init__.py ADDED Viewed

File without changes

graphgen/models/llm/local/hf_wrapper.py ADDED Viewed

	@@ -0,0 +1,147 @@

+from typing import Any, List, Optional
+from graphgen.bases.base_llm_wrapper import BaseLLMWrapper
+from graphgen.bases.datatypes import Token
+class HuggingFaceWrapper(BaseLLMWrapper):
+    """
+    Async inference backend based on HuggingFace Transformers
+    """
+    def __init__(
+        self,
+        model: str,
+        torch_dtype="auto",
+        device_map="auto",
+        trust_remote_code=True,
+        temperature=0.0,
+        top_p=1.0,
+        topk=5,
+        **kwargs: Any,
+    ):
+        super().__init__(temperature=temperature, top_p=top_p, **kwargs)
+        try:
+            import torch
+            from transformers import (
+                AutoModelForCausalLM,
+                AutoTokenizer,
+                GenerationConfig,
+            )
+        except ImportError as exc:
+            raise ImportError(
+                "HuggingFaceWrapper requires torch, transformers and accelerate. "
+                "Install them with:  pip install torch transformers accelerate"
+            ) from exc
+        self.torch = torch
+        self.AutoTokenizer = AutoTokenizer
+        self.AutoModelForCausalLM = AutoModelForCausalLM
+        self.GenerationConfig = GenerationConfig
+        self.tokenizer = AutoTokenizer.from_pretrained(
+            model, trust_remote_code=trust_remote_code
+        )
+        if self.tokenizer.pad_token is None:
+            self.tokenizer.pad_token = self.tokenizer.eos_token
+        self.model = AutoModelForCausalLM.from_pretrained(
+            model,
+            torch_dtype=torch_dtype,
+            device_map=device_map,
+            trust_remote_code=trust_remote_code,
+        )
+        self.model.eval()
+        self.temperature = temperature
+        self.top_p = top_p
+        self.topk = topk
+    @staticmethod
+    def _build_inputs(prompt: str, history: Optional[List[str]] = None) -> str:
+        msgs = history or []
+        lines = []
+        for m in msgs:
+            if isinstance(m, dict):
+                role = m.get("role", "")
+                content = m.get("content", "")
+                lines.append(f"{role}: {content}")
+            else:
+                lines.append(str(m))
+        lines.append(prompt)
+        return "\n".join(lines)
+    async def generate_answer(
+        self, text: str, history: Optional[List[str]] = None, **extra: Any
+    ) -> str:
+        full = self._build_inputs(text, history)
+        inputs = self.tokenizer(full, return_tensors="pt").to(self.model.device)
+        gen_kwargs = {
+            "max_new_tokens": extra.get("max_new_tokens", 512),
+            "do_sample": self.temperature > 0,
+            "temperature": self.temperature if self.temperature > 0 else 1.0,
+            "pad_token_id": self.tokenizer.eos_token_id,
+        }
+        # Add top_p and top_k only if temperature > 0
+        if self.temperature > 0:
+            gen_kwargs.update(top_p=self.top_p, top_k=self.topk)
+        gen_config = self.GenerationConfig(**gen_kwargs)
+        with self.torch.no_grad():
+            out = self.model.generate(**inputs, generation_config=gen_config)
+        gen = out[0, inputs.input_ids.shape[-1] :]
+        return self.tokenizer.decode(gen, skip_special_tokens=True)
+    async def generate_topk_per_token(
+        self, text: str, history: Optional[List[str]] = None, **extra: Any
+    ) -> List[Token]:
+        full = self._build_inputs(text, history)
+        inputs = self.tokenizer(full, return_tensors="pt").to(self.model.device)
+        with self.torch.no_grad():
+            out = self.model.generate(
+                **inputs,
+                max_new_tokens=1,
+                do_sample=False,
+                temperature=1.0,
+                return_dict_in_generate=True,
+                output_scores=True,
+                pad_token_id=self.tokenizer.eos_token_id,
+            )
+        scores = out.scores[0][0]  # (vocab,)
+        probs = self.torch.softmax(scores, dim=-1)
+        top_probs, top_idx = self.torch.topk(probs, k=self.topk)
+        tokens = []
+        for p, idx in zip(top_probs.cpu().numpy(), top_idx.cpu().numpy()):
+            tokens.append(Token(self.tokenizer.decode([idx]), float(p)))
+        return tokens
+    async def generate_inputs_prob(
+        self, text: str, history: Optional[List[str]] = None, **extra: Any
+    ) -> List[Token]:
+        full = self._build_inputs(text, history)
+        ids = self.tokenizer.encode(full)
+        logprobs = []
+        for i in range(1, len(ids) + 1):
+            trunc = ids[: i - 1] + ids[i:] if i < len(ids) else ids[:-1]
+            inputs = self.torch.tensor([trunc]).to(self.model.device)
+            with self.torch.no_grad():
+                logits = self.model(inputs).logits[0, -1, :]
+            probs = self.torch.softmax(logits, dim=-1)
+            true_id = ids[i - 1]
+            logprobs.append(
+                Token(
+                    self.tokenizer.decode([true_id]),
+                    float(probs[true_id].cpu()),
+                )
+            )
+        return logprobs

graphgen/models/llm/local/sglang_wrapper.py ADDED Viewed

	@@ -0,0 +1,148 @@

+import math
+from typing import Any, Dict, List, Optional
+from graphgen.bases.base_llm_wrapper import BaseLLMWrapper
+from graphgen.bases.datatypes import Token
+class SGLangWrapper(BaseLLMWrapper):
+    """
+    Async inference backend based on SGLang offline engine.
+    """
+    def __init__(
+        self,
+        model: str,
+        temperature: float = 0.0,
+        top_p: float = 1.0,
+        topk: int = 5,
+        **kwargs: Any,
+    ):
+        super().__init__(temperature=temperature, top_p=top_p, **kwargs)
+        try:
+            import sglang as sgl
+            from sglang.utils import async_stream_and_merge, stream_and_merge
+        except ImportError as exc:
+            raise ImportError(
+                "SGLangWrapper requires sglang. Install it with: "
+                "uv pip install sglang --prerelease=allow"
+            ) from exc
+        self.model_path: str = model
+        self.temperature = temperature
+        self.top_p = top_p
+        self.topk = topk
+        # Initialise the offline engine
+        self.engine = sgl.Engine(model_path=self.model_path)
+        # Keep helpers for streaming
+        self.async_stream_and_merge = async_stream_and_merge
+        self.stream_and_merge = stream_and_merge
+    @staticmethod
+    def _build_sampling_params(
+        temperature: float,
+        top_p: float,
+        max_tokens: int,
+        topk: int,
+        logprobs: bool = False,
+    ) -> Dict[str, Any]:
+        """Build SGLang-compatible sampling-params dict."""
+        params = {
+            "temperature": temperature,
+            "top_p": top_p,
+            "max_new_tokens": max_tokens,
+        }
+        if logprobs and topk > 0:
+            params["logprobs"] = topk
+        return params
+    def _prep_prompt(self, text: str, history: Optional[List[dict]] = None) -> str:
+        """Convert raw text (+ optional history) into a single prompt string."""
+        parts = []
+        if self.system_prompt:
+            parts.append(self.system_prompt)
+        if history:
+            assert len(history) % 2 == 0, "History must have even length (u/a turns)."
+            parts.extend([item["content"] for item in history])
+        parts.append(text)
+        return "\n".join(parts)
+    def _tokens_from_output(self, output: Dict[str, Any]) -> List[Token]:
+        tokens: List[Token] = []
+        meta = output.get("meta_info", {})
+        logprobs = meta.get("output_token_logprobs", [])
+        topks = meta.get("output_top_logprobs", [])
+        tokenizer = self.engine.tokenizer_manager.tokenizer
+        for idx, (lp, tid, _) in enumerate(logprobs):
+            prob = math.exp(lp)
+            tok_str = tokenizer.decode([tid])
+            top_candidates = []
+            if self.topk > 0 and idx < len(topks):
+                for t_lp, t_tid, _ in topks[idx][: self.topk]:
+                    top_candidates.append(
+                        Token(text=tokenizer.decode([t_tid]), prob=math.exp(t_lp))
+                    )
+            tokens.append(Token(text=tok_str, prob=prob, top_candidates=top_candidates))
+        return tokens
+    async def generate_answer(
+        self,
+        text: str,
+        history: Optional[List[str]] = None,
+        **extra: Any,
+    ) -> str:
+        prompt = self._prep_prompt(text, history)
+        sampling_params = self._build_sampling_params(
+            temperature=self.temperature,
+            top_p=self.top_p,
+            max_tokens=self.max_tokens,
+            topk=0,  # no logprobs needed for simple generation
+        )
+        outputs = await self.engine.async_generate([prompt], sampling_params)
+        return self.filter_think_tags(outputs[0]["text"])
+    async def generate_topk_per_token(
+        self,
+        text: str,
+        history: Optional[List[str]] = None,
+        **extra: Any,
+    ) -> List[Token]:
+        prompt = self._prep_prompt(text, history)
+        sampling_params = self._build_sampling_params(
+            temperature=self.temperature,
+            top_p=self.top_p,
+            max_tokens=1,  # keep short for token-level analysis
+            topk=self.topk,
+        )
+        outputs = await self.engine.async_generate(
+            [prompt], sampling_params, return_logprob=True, top_logprobs_num=5
+        )
+        print(outputs)
+        return self._tokens_from_output(outputs[0])
+    async def generate_inputs_prob(
+        self, text: str, history: Optional[List[str]] = None, **extra: Any
+    ) -> List[Token]:
+        raise NotImplementedError(
+            "SGLangWrapper does not support per-token logprobs yet."
+        )
+    def shutdown(self) -> None:
+        """Gracefully shutdown the SGLang engine."""
+        if hasattr(self, "engine"):
+            self.engine.shutdown()
+    def restart(self) -> None:
+        """Restart the SGLang engine."""
+        self.shutdown()
+        self.engine = self.engine.__class__(model_path=self.model_path)

graphgen/models/llm/local/tgi_wrapper.py ADDED Viewed

	@@ -0,0 +1,36 @@

+from typing import Any, List, Optional
+from graphgen.bases import BaseLLMWrapper
+from graphgen.bases.datatypes import Token
+# TODO: implement TGIWrapper methods
+class TGIWrapper(BaseLLMWrapper):
+    """
+    Async inference backend based on TGI (Text-Generation-Inference)
+    """
+    def __init__(
+        self,
+        model_url: str,  # e.g. "http://localhost:8080"
+        temperature: float = 0.0,
+        top_p: float = 1.0,
+        topk: int = 5,
+        **kwargs: Any
+    ):
+        super().__init__(temperature=temperature, top_p=top_p, **kwargs)
+    async def generate_answer(
+        self, text: str, history: Optional[List[str]] = None, **extra: Any
+    ) -> str:
+        pass
+    async def generate_topk_per_token(
+        self, text: str, history: Optional[List[str]] = None, **extra: Any
+    ) -> List[Token]:
+        pass
+    async def generate_inputs_prob(
+        self, text: str, history: Optional[List[str]] = None, **extra: Any
+    ) -> List[Token]:
+        pass

graphgen/models/llm/{ollama_client.py → local/trt_wrapper.py} RENAMED Viewed

@@ -1,10 +1,15 @@
-# TODO: implement ollama client
 from typing import Any, List, Optional
-from graphgen.bases import BaseLLMClient, Token
-class OllamaClient(BaseLLMClient):
     async def generate_answer(
         self, text: str, history: Optional[List[str]] = None, **extra: Any
     ) -> str:

 from typing import Any, List, Optional
+from graphgen.bases import BaseLLMWrapper
+from graphgen.bases.datatypes import Token
+# TODO: implement TensorRTWrapper methods
+class TensorRTWrapper(BaseLLMWrapper):
+    """
+    Async inference backend based on TensorRT-LLM
+    """
     async def generate_answer(
         self, text: str, history: Optional[List[str]] = None, **extra: Any
     ) -> str:

graphgen/models/llm/local/vllm_wrapper.py ADDED Viewed

	@@ -0,0 +1,137 @@

+from typing import Any, List, Optional
+from graphgen.bases.base_llm_wrapper import BaseLLMWrapper
+from graphgen.bases.datatypes import Token
+class VLLMWrapper(BaseLLMWrapper):
+    """
+    Async inference backend based on vLLM (https://github.com/vllm-project/vllm)
+    """
+    def __init__(
+        self,
+        model: str,
+        tensor_parallel_size: int = 1,
+        gpu_memory_utilization: float = 0.9,
+        temperature: float = 0.0,
+        top_p: float = 1.0,
+        topk: int = 5,
+        **kwargs: Any,
+    ):
+        super().__init__(temperature=temperature, top_p=top_p, **kwargs)
+        try:
+            from vllm import AsyncEngineArgs, AsyncLLMEngine, SamplingParams
+        except ImportError as exc:
+            raise ImportError(
+                "VLLMWrapper requires vllm. Install it with:  uv pip install vllm --torch-backend=auto"
+            ) from exc
+        self.SamplingParams = SamplingParams
+        engine_args = AsyncEngineArgs(
+            model=model,
+            tensor_parallel_size=tensor_parallel_size,
+            gpu_memory_utilization=gpu_memory_utilization,
+            trust_remote_code=kwargs.get("trust_remote_code", True),
+        )
+        self.engine = AsyncLLMEngine.from_engine_args(engine_args)
+        self.temperature = temperature
+        self.top_p = top_p
+        self.topk = topk
+    @staticmethod
+    def _build_inputs(prompt: str, history: Optional[List[str]] = None) -> str:
+        msgs = history or []
+        lines = []
+        for m in msgs:
+            if isinstance(m, dict):
+                role = m.get("role", "")
+                content = m.get("content", "")
+                lines.append(f"{role}: {content}")
+            else:
+                lines.append(str(m))
+        lines.append(prompt)
+        return "\n".join(lines)
+    async def generate_answer(
+        self, text: str, history: Optional[List[str]] = None, **extra: Any
+    ) -> str:
+        full_prompt = self._build_inputs(text, history)
+        sp = self.SamplingParams(
+            temperature=self.temperature if self.temperature > 0 else 1.0,
+            top_p=self.top_p if self.temperature > 0 else 1.0,
+            max_tokens=extra.get("max_new_tokens", 512),
+        )
+        results = []
+        async for req_output in self.engine.generate(
+            full_prompt, sp, request_id="graphgen_req"
+        ):
+            results = req_output.outputs
+        return results[-1].text
+    async def generate_topk_per_token(
+        self, text: str, history: Optional[List[str]] = None, **extra: Any
+    ) -> List[Token]:
+        full_prompt = self._build_inputs(text, history)
+        sp = self.SamplingParams(
+            temperature=0,
+            max_tokens=1,
+            logprobs=self.topk,
+        )
+        results = []
+        async for req_output in self.engine.generate(
+            full_prompt, sp, request_id="graphgen_topk"
+        ):
+            results = req_output.outputs
+        top_logprobs = results[-1].logprobs[0]
+        tokens = []
+        for _, logprob_obj in top_logprobs.items():
+            tok_str = logprob_obj.decoded_token
+            prob = float(logprob_obj.logprob.exp())
+            tokens.append(Token(tok_str, prob))
+        tokens.sort(key=lambda x: -x.prob)
+        return tokens
+    async def generate_inputs_prob(
+        self, text: str, history: Optional[List[str]] = None, **extra: Any
+    ) -> List[Token]:
+        full_prompt = self._build_inputs(text, history)
+        # vLLM 没有现成的“mask 一个 token 再算 prob”接口，
+        # 我们采用最直观的方式：把 prompt 一次性送进去，打开
+        # prompt_logprobs=True，让 vLLM 返回 *输入部分* 每个位置的
+        # logprob，然后挑出对应 token 的概率即可。
+        sp = self.SamplingParams(
+            temperature=0,
+            max_tokens=0,  # 不生成新 token
+            prompt_logprobs=1,  # 只要 top-1 就够了
+        )
+        results = []
+        async for req_output in self.engine.generate(
+            full_prompt, sp, request_id="graphgen_prob"
+        ):
+            results = req_output.outputs
+        # prompt_logprobs 是一个 list，长度 = prompt token 数，
+        # 每个元素是 dict{token_id: logprob_obj} 或 None（首个位置为 None）
+        prompt_logprobs = results[-1].prompt_logprobs
+        tokens = []
+        for _, logprob_dict in enumerate(prompt_logprobs):
+            if logprob_dict is None:
+                continue
+            # 这里每个 dict 只有 1 个 kv，因为 top-1
+            _, logprob_obj = next(iter(logprob_dict.items()))
+            tok_str = logprob_obj.decoded_token
+            prob = float(logprob_obj.logprob.exp())
+            tokens.append(Token(tok_str, prob))
+        return tokens

graphgen/models/llm/topk_token_model.py DELETED Viewed

@@ -1,53 +0,0 @@
-from abc import ABC, abstractmethod
-from typing import List, Optional
-from graphgen.bases import Token
-class TopkTokenModel(ABC):
-    def __init__(
-        self,
-        do_sample: bool = False,
-        temperature: float = 0,
-        max_tokens: int = 4096,
-        repetition_penalty: float = 1.05,
-        num_beams: int = 1,
-        topk: int = 50,
-        topp: float = 0.95,
-        topk_per_token: int = 5,
-    ):
-        self.do_sample = do_sample
-        self.temperature = temperature
-        self.max_tokens = max_tokens
-        self.repetition_penalty = repetition_penalty
-        self.num_beams = num_beams
-        self.topk = topk
-        self.topp = topp
-        self.topk_per_token = topk_per_token
-    @abstractmethod
-    async def generate_topk_per_token(self, text: str) -> List[Token]:
-        """
-        Generate prob, text and candidates for each token of the model's output.
-        This function is used to visualize the inference process.
-        """
-        raise NotImplementedError
-    @abstractmethod
-    async def generate_inputs_prob(
-        self, text: str, history: Optional[List[str]] = None
-    ) -> List[Token]:
-        """
-        Generate prob and text for each token of the input text.
-        This function is used to visualize the ppl.
-        """
-        raise NotImplementedError
-    @abstractmethod
-    async def generate_answer(
-        self, text: str, history: Optional[List[str]] = None
-    ) -> str:
-        """
-        Generate answer from the model.
-        """
-        raise NotImplementedError

graphgen/operators/__init__.py CHANGED Viewed

@@ -1,5 +1,6 @@
 from .build_kg import build_mm_kg, build_text_kg
 from .generate import generate_qas
 from .judge import judge_statement
 from .partition import partition_kg
 from .quiz import quiz

 from .build_kg import build_mm_kg, build_text_kg
 from .generate import generate_qas
+from .init import init_llm
 from .judge import judge_statement
 from .partition import partition_kg
 from .quiz import quiz

graphgen/operators/build_kg/build_mm_kg.py CHANGED Viewed

@@ -3,14 +3,15 @@ from typing import List
 import gradio as gr
 from graphgen.bases.base_storage import BaseGraphStorage
 from graphgen.bases.datatypes import Chunk
-from graphgen.models import MMKGBuilder, OpenAIClient
 from graphgen.utils import run_concurrent
 async def build_mm_kg(
-    llm_client: OpenAIClient,
     kg_instance: BaseGraphStorage,
     chunks: List[Chunk],
     progress_bar: gr.Progress = None,

 import gradio as gr
+from graphgen.bases import BaseLLMWrapper
 from graphgen.bases.base_storage import BaseGraphStorage
 from graphgen.bases.datatypes import Chunk
+from graphgen.models import MMKGBuilder
 from graphgen.utils import run_concurrent
 async def build_mm_kg(
+    llm_client: BaseLLMWrapper,
     kg_instance: BaseGraphStorage,
     chunks: List[Chunk],
     progress_bar: gr.Progress = None,

graphgen/operators/build_kg/build_text_kg.py CHANGED Viewed

@@ -3,14 +3,15 @@ from typing import List
 import gradio as gr
 from graphgen.bases.base_storage import BaseGraphStorage
 from graphgen.bases.datatypes import Chunk
-from graphgen.models import LightRAGKGBuilder, OpenAIClient
 from graphgen.utils import run_concurrent
 async def build_text_kg(
-    llm_client: OpenAIClient,
     kg_instance: BaseGraphStorage,
     chunks: List[Chunk],
     progress_bar: gr.Progress = None,

 import gradio as gr
+from graphgen.bases import BaseLLMWrapper
 from graphgen.bases.base_storage import BaseGraphStorage
 from graphgen.bases.datatypes import Chunk
+from graphgen.models import LightRAGKGBuilder
 from graphgen.utils import run_concurrent
 async def build_text_kg(
+    llm_client: BaseLLMWrapper,
     kg_instance: BaseGraphStorage,
     chunks: List[Chunk],
     progress_bar: gr.Progress = None,

graphgen/operators/generate/generate_qas.py CHANGED Viewed

@@ -1,6 +1,6 @@
 from typing import Any
-from graphgen.bases import BaseLLMClient
 from graphgen.models import (
     AggregatedGenerator,
     AtomicGenerator,
@@ -12,7 +12,7 @@ from graphgen.utils import logger, run_concurrent
 async def generate_qas(
-    llm_client: BaseLLMClient,
     batches: list[
         tuple[
             list[tuple[str, dict]], list[tuple[Any, Any, dict] | tuple[Any, Any, Any]]

 from typing import Any
+from graphgen.bases import BaseLLMWrapper
 from graphgen.models import (
     AggregatedGenerator,
     AtomicGenerator,
 async def generate_qas(
+    llm_client: BaseLLMWrapper,
     batches: list[
         tuple[
             list[tuple[str, dict]], list[tuple[Any, Any, dict] | tuple[Any, Any, Any]]

graphgen/operators/init/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from .init_llm import init_llm

graphgen/operators/init/init_llm.py ADDED Viewed

	@@ -0,0 +1,84 @@

+import os
+from typing import Any, Dict, Optional
+from graphgen.bases import BaseLLMWrapper
+from graphgen.models import Tokenizer
+class LLMFactory:
+    """
+    A factory class to create LLM wrapper instances based on the specified backend.
+    Supported backends include:
+    - http_api: HTTPClient
+    - openai_api: OpenAIClient
+    - ollama_api: OllamaClient
+    - ollama: OllamaWrapper
+    - deepspeed: DeepSpeedWrapper
+    - huggingface: HuggingFaceWrapper
+    - tgi: TGIWrapper
+    - sglang: SGLangWrapper
+    - tensorrt: TensorRTWrapper
+    """
+    @staticmethod
+    def create_llm_wrapper(backend: str, config: Dict[str, Any]) -> BaseLLMWrapper:
+        # add tokenizer
+        tokenizer: Tokenizer = Tokenizer(
+            os.environ.get("TOKENIZER_MODEL", "cl100k_base"),
+        )
+        config["tokenizer"] = tokenizer
+        if backend == "http_api":
+            from graphgen.models.llm.api.http_client import HTTPClient
+            return HTTPClient(**config)
+        if backend == "openai_api":
+            from graphgen.models.llm.api.openai_client import OpenAIClient
+            return OpenAIClient(**config)
+        if backend == "ollama_api":
+            from graphgen.models.llm.api.ollama_client import OllamaClient
+            return OllamaClient(**config)
+        if backend == "huggingface":
+            from graphgen.models.llm.local.hf_wrapper import HuggingFaceWrapper
+            return HuggingFaceWrapper(**config)
+        # if backend == "sglang":
+        #     from graphgen.models.llm.local.sglang_wrapper import SGLangWrapper
+        #
+        #     return SGLangWrapper(**config)
+        if backend == "vllm":
+            from graphgen.models.llm.local.vllm_wrapper import VLLMWrapper
+            return VLLMWrapper(**config)
+        raise NotImplementedError(f"Backend {backend} is not implemented yet.")
+def _load_env_group(prefix: str) -> Dict[str, Any]:
+    """
+    Collect environment variables with the given prefix into a dictionary,
+    stripping the prefix from the keys.
+    """
+    return {
+        k[len(prefix) :].lower(): v
+        for k, v in os.environ.items()
+        if k.startswith(prefix)
+    }
+def init_llm(model_type: str) -> Optional[BaseLLMWrapper]:
+    if model_type == "synthesizer":
+        prefix = "SYNTHESIZER_"
+    elif model_type == "trainee":
+        prefix = "TRAINEE_"
+    else:
+        raise NotImplementedError(f"Model type {model_type} is not implemented yet.")
+    config = _load_env_group(prefix)
+    # if config is empty, return None
+    if not config:
+        return None
+    backend = config.pop("backend")
+    llm_wrapper = LLMFactory.create_llm_wrapper(backend, config)
+    return llm_wrapper

graphgen/operators/judge.py CHANGED Viewed

@@ -3,13 +3,14 @@ import math
 from tqdm.asyncio import tqdm as tqdm_async
-from graphgen.models import JsonKVStorage, NetworkXStorage, OpenAIClient
 from graphgen.templates import STATEMENT_JUDGEMENT_PROMPT
 from graphgen.utils import logger, yes_no_loss_entropy
 async def judge_statement(  # pylint: disable=too-many-statements
-    trainee_llm_client: OpenAIClient,
     graph_storage: NetworkXStorage,
     rephrase_storage: JsonKVStorage,
     re_judge: bool = False,

 from tqdm.asyncio import tqdm as tqdm_async
+from graphgen.bases import BaseLLMWrapper
+from graphgen.models import JsonKVStorage, NetworkXStorage
 from graphgen.templates import STATEMENT_JUDGEMENT_PROMPT
 from graphgen.utils import logger, yes_no_loss_entropy
 async def judge_statement(  # pylint: disable=too-many-statements
+    trainee_llm_client: BaseLLMWrapper,
     graph_storage: NetworkXStorage,
     rephrase_storage: JsonKVStorage,
     re_judge: bool = False,

graphgen/operators/quiz.py CHANGED Viewed

@@ -3,13 +3,14 @@ from collections import defaultdict
 from tqdm.asyncio import tqdm as tqdm_async
-from graphgen.models import JsonKVStorage, NetworkXStorage, OpenAIClient
 from graphgen.templates import DESCRIPTION_REPHRASING_PROMPT
 from graphgen.utils import detect_main_language, logger
 async def quiz(
-    synth_llm_client: OpenAIClient,
     graph_storage: NetworkXStorage,
     rephrase_storage: JsonKVStorage,
     max_samples: int = 1,

 from tqdm.asyncio import tqdm as tqdm_async
+from graphgen.bases import BaseLLMWrapper
+from graphgen.models import JsonKVStorage, NetworkXStorage
 from graphgen.templates import DESCRIPTION_REPHRASING_PROMPT
 from graphgen.utils import detect_main_language, logger
 async def quiz(
+    synth_llm_client: BaseLLMWrapper,
     graph_storage: NetworkXStorage,
     rephrase_storage: JsonKVStorage,
     max_samples: int = 1,

requirements.txt CHANGED Viewed

@@ -25,4 +25,4 @@ igraph
 python-louvain
 # For visualization
-matplotlib

 python-louvain
 # For visualization
+matplotlib