Spaces:

chenzihong
/

GraphGen

Running

App Files Files Community

github-actions[bot] commited on Oct 23

Commit

8c66169

1 Parent(s): 0b9d8c7

Auto-sync from demo at Thu Oct 23 12:37:24 UTC 2025

Browse files

Files changed (29) hide show

graphgen/bases/base_generator.py +2 -3
graphgen/bases/base_kg_builder.py +4 -8
graphgen/bases/base_partitioner.py +0 -2
graphgen/bases/base_splitter.py +15 -8
graphgen/bases/base_storage.py +0 -3
graphgen/bases/base_tokenizer.py +2 -3
graphgen/models/evaluator/base_evaluator.py +3 -4
graphgen/models/evaluator/length_evaluator.py +3 -6
graphgen/models/evaluator/mtld_evaluator.py +4 -8
graphgen/models/generator/aggregated_generator.py +0 -2
graphgen/models/generator/atomic_generator.py +0 -2
graphgen/models/generator/cot_generator.py +0 -2
graphgen/models/generator/multi_hop_generator.py +0 -2
graphgen/models/generator/vqa_generator.py +0 -2
graphgen/models/kg_builder/light_rag_kg_builder.py +3 -4
graphgen/models/kg_builder/mm_kg_builder.py +1 -3
graphgen/models/llm/topk_token_model.py +24 -12
graphgen/models/partitioner/bfs_partitioner.py +0 -2
graphgen/models/partitioner/dfs_partitioner.py +0 -2
graphgen/models/partitioner/ece_partitioner.py +0 -2
graphgen/models/partitioner/leiden_partitioner.py +0 -2
graphgen/models/search/db/uniprot_search.py +0 -3
graphgen/models/search/kg/wiki_search.py +0 -2
graphgen/models/search/web/bing_search.py +2 -4
graphgen/models/search/web/google_search.py +0 -3
graphgen/models/storage/json_storage.py +2 -0
graphgen/models/tokenizer/__init__.py +2 -6
graphgen/models/tokenizer/hf_tokenizer.py +2 -3
graphgen/models/tokenizer/tiktoken_tokenizer.py +2 -3

graphgen/bases/base_generator.py CHANGED Viewed

@@ -1,17 +1,16 @@
 from abc import ABC, abstractmethod
-from dataclasses import dataclass
 from typing import Any
 from graphgen.bases.base_llm_client import BaseLLMClient
-@dataclass
 class BaseGenerator(ABC):
     """
     Generate QAs based on given prompts.
     """
-    llm_client: BaseLLMClient
     @staticmethod
     @abstractmethod

 from abc import ABC, abstractmethod
 from typing import Any
 from graphgen.bases.base_llm_client import BaseLLMClient
 class BaseGenerator(ABC):
     """
     Generate QAs based on given prompts.
     """
+    def __init__(self, llm_client: BaseLLMClient):
+        self.llm_client = llm_client
     @staticmethod
     @abstractmethod

graphgen/bases/base_kg_builder.py CHANGED Viewed

@@ -1,6 +1,5 @@
 from abc import ABC, abstractmethod
 from collections import defaultdict
-from dataclasses import dataclass, field
 from typing import Dict, List, Tuple
 from graphgen.bases.base_llm_client import BaseLLMClient
@@ -8,14 +7,11 @@ from graphgen.bases.base_storage import BaseGraphStorage
 from graphgen.bases.datatypes import Chunk
-@dataclass
 class BaseKGBuilder(ABC):
-    llm_client: BaseLLMClient
-    _nodes: Dict[str, List[dict]] = field(default_factory=lambda: defaultdict(list))
-    _edges: Dict[Tuple[str, str], List[dict]] = field(
-        default_factory=lambda: defaultdict(list)
-    )
     @abstractmethod
     async def extract(

 from abc import ABC, abstractmethod
 from collections import defaultdict
 from typing import Dict, List, Tuple
 from graphgen.bases.base_llm_client import BaseLLMClient
 from graphgen.bases.datatypes import Chunk
 class BaseKGBuilder(ABC):
+    def __init__(self, llm_client: BaseLLMClient):
+        self.llm_client = llm_client
+        self._nodes: Dict[str, List[dict]] = defaultdict(list)
+        self._edges: Dict[Tuple[str, str], List[dict]] = defaultdict(list)
     @abstractmethod
     async def extract(

graphgen/bases/base_partitioner.py CHANGED Viewed

@@ -1,12 +1,10 @@
 from abc import ABC, abstractmethod
-from dataclasses import dataclass
 from typing import Any, List
 from graphgen.bases.base_storage import BaseGraphStorage
 from graphgen.bases.datatypes import Community
-@dataclass
 class BasePartitioner(ABC):
     @abstractmethod
     async def partition(

 from abc import ABC, abstractmethod
 from typing import Any, List
 from graphgen.bases.base_storage import BaseGraphStorage
 from graphgen.bases.datatypes import Community
 class BasePartitioner(ABC):
     @abstractmethod
     async def partition(

graphgen/bases/base_splitter.py CHANGED Viewed

@@ -1,25 +1,32 @@
 import copy
 import re
 from abc import ABC, abstractmethod
-from dataclasses import dataclass
 from typing import Callable, Iterable, List, Literal, Optional, Union
 from graphgen.bases.datatypes import Chunk
 from graphgen.utils import logger
-@dataclass
 class BaseSplitter(ABC):
     """
     Abstract base class for splitting text into smaller chunks.
     """
-    chunk_size: int = 1024
-    chunk_overlap: int = 100
-    length_function: Callable[[str], int] = len
-    keep_separator: bool = False
-    add_start_index: bool = False
-    strip_whitespace: bool = True
     @abstractmethod
     def split_text(self, text: str) -> List[str]:

 import copy
 import re
 from abc import ABC, abstractmethod
 from typing import Callable, Iterable, List, Literal, Optional, Union
 from graphgen.bases.datatypes import Chunk
 from graphgen.utils import logger
 class BaseSplitter(ABC):
     """
     Abstract base class for splitting text into smaller chunks.
     """
+    def __init__(
+        self,
+        chunk_size: int = 1024,
+        chunk_overlap: int = 100,
+        length_function: Callable[[str], int] = len,
+        keep_separator: bool = False,
+        add_start_index: bool = False,
+        strip_whitespace: bool = True,
+    ):
+        self.chunk_size = chunk_size
+        self.chunk_overlap = chunk_overlap
+        self.length_function = length_function
+        self.keep_separator = keep_separator
+        self.add_start_index = add_start_index
+        self.strip_whitespace = strip_whitespace
     @abstractmethod
     def split_text(self, text: str) -> List[str]:

graphgen/bases/base_storage.py CHANGED Viewed

@@ -16,7 +16,6 @@ class StorageNameSpace:
         """commit the storage operations after querying"""
-@dataclass
 class BaseListStorage(Generic[T], StorageNameSpace):
     async def all_items(self) -> list[T]:
         raise NotImplementedError
@@ -34,7 +33,6 @@ class BaseListStorage(Generic[T], StorageNameSpace):
         raise NotImplementedError
-@dataclass
 class BaseKVStorage(Generic[T], StorageNameSpace):
     async def all_keys(self) -> list[str]:
         raise NotImplementedError
@@ -58,7 +56,6 @@ class BaseKVStorage(Generic[T], StorageNameSpace):
         raise NotImplementedError
-@dataclass
 class BaseGraphStorage(StorageNameSpace):
     async def has_node(self, node_id: str) -> bool:
         raise NotImplementedError

         """commit the storage operations after querying"""
 class BaseListStorage(Generic[T], StorageNameSpace):
     async def all_items(self) -> list[T]:
         raise NotImplementedError
         raise NotImplementedError
 class BaseKVStorage(Generic[T], StorageNameSpace):
     async def all_keys(self) -> list[str]:
         raise NotImplementedError
         raise NotImplementedError
 class BaseGraphStorage(StorageNameSpace):
     async def has_node(self, node_id: str) -> bool:
         raise NotImplementedError

graphgen/bases/base_tokenizer.py CHANGED Viewed

@@ -1,13 +1,12 @@
 from __future__ import annotations
 from abc import ABC, abstractmethod
-from dataclasses import dataclass
 from typing import List
-@dataclass
 class BaseTokenizer(ABC):
-    model_name: str = "cl100k_base"
     @abstractmethod
     def encode(self, text: str) -> List[int]:

 from __future__ import annotations
 from abc import ABC, abstractmethod
 from typing import List
 class BaseTokenizer(ABC):
+    def __init__(self, model_name: str = "cl100k_base"):
+        self.model_name = model_name
     @abstractmethod
     def encode(self, text: str) -> List[int]:

graphgen/models/evaluator/base_evaluator.py CHANGED Viewed

@@ -1,5 +1,4 @@
 import asyncio
-from dataclasses import dataclass
 from tqdm.asyncio import tqdm as tqdm_async
@@ -7,10 +6,10 @@ from graphgen.bases.datatypes import QAPair
 from graphgen.utils import create_event_loop
-@dataclass
 class BaseEvaluator:
-    max_concurrent: int = 100
-    results: list[float] = None
     def evaluate(self, pairs: list[QAPair]) -> list[float]:
         """

 import asyncio
 from tqdm.asyncio import tqdm as tqdm_async
 from graphgen.utils import create_event_loop
 class BaseEvaluator:
+    def __init__(self, max_concurrent: int = 100):
+        self.max_concurrent = max_concurrent
+        self.results: list[float] = None
     def evaluate(self, pairs: list[QAPair]) -> list[float]:
         """

graphgen/models/evaluator/length_evaluator.py CHANGED Viewed

@@ -1,16 +1,13 @@
-from dataclasses import dataclass
 from graphgen.bases.datatypes import QAPair
 from graphgen.models.evaluator.base_evaluator import BaseEvaluator
 from graphgen.models.tokenizer import Tokenizer
 from graphgen.utils import create_event_loop
-@dataclass
 class LengthEvaluator(BaseEvaluator):
-    tokenizer_name: str = "cl100k_base"
-    def __post_init__(self):
         self.tokenizer = Tokenizer(model_name=self.tokenizer_name)
     async def evaluate_single(self, pair: QAPair) -> float:

 from graphgen.bases.datatypes import QAPair
 from graphgen.models.evaluator.base_evaluator import BaseEvaluator
 from graphgen.models.tokenizer import Tokenizer
 from graphgen.utils import create_event_loop
 class LengthEvaluator(BaseEvaluator):
+    def __init__(self, tokenizer_name: str = "cl100k_base", max_concurrent: int = 100):
+        super().__init__(max_concurrent)
+        self.tokenizer_name = tokenizer_name
         self.tokenizer = Tokenizer(model_name=self.tokenizer_name)
     async def evaluate_single(self, pair: QAPair) -> float:

graphgen/models/evaluator/mtld_evaluator.py CHANGED Viewed

@@ -1,4 +1,3 @@
-from dataclasses import dataclass, field
 from typing import Set
 from graphgen.bases.datatypes import QAPair
@@ -8,18 +7,15 @@ from graphgen.utils import NLTKHelper, create_event_loop, detect_main_language
 nltk_helper = NLTKHelper()
-@dataclass
 class MTLDEvaluator(BaseEvaluator):
     """
     衡量文本词汇多样性的指标
     """
-    stopwords_en: Set[str] = field(
-        default_factory=lambda: set(nltk_helper.get_stopwords("english"))
-    )
-    stopwords_zh: Set[str] = field(
-        default_factory=lambda: set(nltk_helper.get_stopwords("chinese"))
-    )
     async def evaluate_single(self, pair: QAPair) -> float:
         loop = create_event_loop()

 from typing import Set
 from graphgen.bases.datatypes import QAPair
 nltk_helper = NLTKHelper()
 class MTLDEvaluator(BaseEvaluator):
     """
     衡量文本词汇多样性的指标
     """
+    def __init__(self, max_concurrent: int = 100):
+        super().__init__(max_concurrent)
+        self.stopwords_en: Set[str] = set(nltk_helper.get_stopwords("english"))
+        self.stopwords_zh: Set[str] = set(nltk_helper.get_stopwords("chinese"))
     async def evaluate_single(self, pair: QAPair) -> float:
         loop = create_event_loop()

graphgen/models/generator/aggregated_generator.py CHANGED Viewed

@@ -1,4 +1,3 @@
-from dataclasses import dataclass
 from typing import Any
 from graphgen.bases import BaseGenerator
@@ -6,7 +5,6 @@ from graphgen.templates import AGGREGATED_GENERATION_PROMPT
 from graphgen.utils import compute_content_hash, detect_main_language, logger
-@dataclass
 class AggregatedGenerator(BaseGenerator):
     """
     Aggregated Generator follows a TWO-STEP process:

 from typing import Any
 from graphgen.bases import BaseGenerator
 from graphgen.utils import compute_content_hash, detect_main_language, logger
 class AggregatedGenerator(BaseGenerator):
     """
     Aggregated Generator follows a TWO-STEP process:

graphgen/models/generator/atomic_generator.py CHANGED Viewed

@@ -1,4 +1,3 @@
-from dataclasses import dataclass
 from typing import Any
 from graphgen.bases import BaseGenerator
@@ -6,7 +5,6 @@ from graphgen.templates import ATOMIC_GENERATION_PROMPT
 from graphgen.utils import compute_content_hash, detect_main_language, logger
-@dataclass
 class AtomicGenerator(BaseGenerator):
     @staticmethod
     def build_prompt(

 from typing import Any
 from graphgen.bases import BaseGenerator
 from graphgen.utils import compute_content_hash, detect_main_language, logger
 class AtomicGenerator(BaseGenerator):
     @staticmethod
     def build_prompt(

graphgen/models/generator/cot_generator.py CHANGED Viewed

@@ -1,4 +1,3 @@
-from dataclasses import dataclass
 from typing import Any
 from graphgen.bases import BaseGenerator
@@ -6,7 +5,6 @@ from graphgen.templates import COT_GENERATION_PROMPT
 from graphgen.utils import compute_content_hash, detect_main_language, logger
-@dataclass
 class CoTGenerator(BaseGenerator):
     @staticmethod
     def build_prompt(

 from typing import Any
 from graphgen.bases import BaseGenerator
 from graphgen.utils import compute_content_hash, detect_main_language, logger
 class CoTGenerator(BaseGenerator):
     @staticmethod
     def build_prompt(

graphgen/models/generator/multi_hop_generator.py CHANGED Viewed

@@ -1,4 +1,3 @@
-from dataclasses import dataclass
 from typing import Any
 from graphgen.bases import BaseGenerator
@@ -6,7 +5,6 @@ from graphgen.templates import MULTI_HOP_GENERATION_PROMPT
 from graphgen.utils import compute_content_hash, detect_main_language, logger
-@dataclass
 class MultiHopGenerator(BaseGenerator):
     @staticmethod
     def build_prompt(

 from typing import Any
 from graphgen.bases import BaseGenerator
 from graphgen.utils import compute_content_hash, detect_main_language, logger
 class MultiHopGenerator(BaseGenerator):
     @staticmethod
     def build_prompt(

graphgen/models/generator/vqa_generator.py CHANGED Viewed

@@ -1,4 +1,3 @@
-from dataclasses import dataclass
 from typing import Any
 from graphgen.bases import BaseGenerator
@@ -6,7 +5,6 @@ from graphgen.templates import VQA_GENERATION_PROMPT
 from graphgen.utils import compute_content_hash, detect_main_language, logger
-@dataclass
 class VQAGenerator(BaseGenerator):
     @staticmethod
     def build_prompt(

 from typing import Any
 from graphgen.bases import BaseGenerator
 from graphgen.utils import compute_content_hash, detect_main_language, logger
 class VQAGenerator(BaseGenerator):
     @staticmethod
     def build_prompt(

graphgen/models/kg_builder/light_rag_kg_builder.py CHANGED Viewed

@@ -1,6 +1,5 @@
 import re
 from collections import Counter, defaultdict
-from dataclasses import dataclass
 from typing import Dict, List, Tuple
 from graphgen.bases import BaseGraphStorage, BaseKGBuilder, BaseLLMClient, Chunk
@@ -15,10 +14,10 @@ from graphgen.utils import (
 )
-@dataclass
 class LightRAGKGBuilder(BaseKGBuilder):
-    llm_client: BaseLLMClient = None
-    max_loop: int = 3
     async def extract(
         self, chunk: Chunk

 import re
 from collections import Counter, defaultdict
 from typing import Dict, List, Tuple
 from graphgen.bases import BaseGraphStorage, BaseKGBuilder, BaseLLMClient, Chunk
 )
 class LightRAGKGBuilder(BaseKGBuilder):
+    def __init__(self, llm_client: BaseLLMClient, max_loop: int = 3):
+        super().__init__(llm_client)
+        self.max_loop = max_loop
     async def extract(
         self, chunk: Chunk

graphgen/models/kg_builder/mm_kg_builder.py CHANGED Viewed

@@ -2,7 +2,7 @@ import re
 from collections import defaultdict
 from typing import Dict, List, Tuple
-from graphgen.bases import BaseLLMClient, Chunk
 from graphgen.templates import MMKG_EXTRACTION_PROMPT
 from graphgen.utils import (
     detect_main_language,
@@ -16,8 +16,6 @@ from .light_rag_kg_builder import LightRAGKGBuilder
 class MMKGBuilder(LightRAGKGBuilder):
-    llm_client: BaseLLMClient = None
     async def extract(
         self, chunk: Chunk
     ) -> Tuple[Dict[str, List[dict]], Dict[Tuple[str, str], List[dict]]]:

 from collections import defaultdict
 from typing import Dict, List, Tuple
+from graphgen.bases import Chunk
 from graphgen.templates import MMKG_EXTRACTION_PROMPT
 from graphgen.utils import (
     detect_main_language,
 class MMKGBuilder(LightRAGKGBuilder):
     async def extract(
         self, chunk: Chunk
     ) -> Tuple[Dict[str, List[dict]], Dict[Tuple[str, str], List[dict]]]:

graphgen/models/llm/topk_token_model.py CHANGED Viewed

@@ -1,21 +1,31 @@
-from dataclasses import dataclass
 from typing import List, Optional
 from graphgen.bases import Token
-@dataclass
-class TopkTokenModel:
-    do_sample: bool = False
-    temperature: float = 0
-    max_tokens: int = 4096
-    repetition_penalty: float = 1.05
-    num_beams: int = 1
-    topk: int = 50
-    topp: float = 0.95
-    topk_per_token: int = 5  # number of topk tokens to generate for each token
     async def generate_topk_per_token(self, text: str) -> List[Token]:
         """
         Generate prob, text and candidates for each token of the model's output.
@@ -23,6 +33,7 @@ class TopkTokenModel:
         """
         raise NotImplementedError
     async def generate_inputs_prob(
         self, text: str, history: Optional[List[str]] = None
     ) -> List[Token]:
@@ -32,6 +43,7 @@ class TopkTokenModel:
         """
         raise NotImplementedError
     async def generate_answer(
         self, text: str, history: Optional[List[str]] = None
     ) -> str:

+from abc import ABC, abstractmethod
 from typing import List, Optional
 from graphgen.bases import Token
+class TopkTokenModel(ABC):
+    def __init__(
+        self,
+        do_sample: bool = False,
+        temperature: float = 0,
+        max_tokens: int = 4096,
+        repetition_penalty: float = 1.05,
+        num_beams: int = 1,
+        topk: int = 50,
+        topp: float = 0.95,
+        topk_per_token: int = 5,
+    ):
+        self.do_sample = do_sample
+        self.temperature = temperature
+        self.max_tokens = max_tokens
+        self.repetition_penalty = repetition_penalty
+        self.num_beams = num_beams
+        self.topk = topk
+        self.topp = topp
+        self.topk_per_token = topk_per_token
+    @abstractmethod
     async def generate_topk_per_token(self, text: str) -> List[Token]:
         """
         Generate prob, text and candidates for each token of the model's output.
         """
         raise NotImplementedError
+    @abstractmethod
     async def generate_inputs_prob(
         self, text: str, history: Optional[List[str]] = None
     ) -> List[Token]:
         """
         raise NotImplementedError
+    @abstractmethod
     async def generate_answer(
         self, text: str, history: Optional[List[str]] = None
     ) -> str:

graphgen/models/partitioner/bfs_partitioner.py CHANGED Viewed

@@ -1,6 +1,5 @@
 import random
 from collections import deque
-from dataclasses import dataclass
 from typing import Any, List
 from graphgen.bases import BaseGraphStorage, BasePartitioner
@@ -10,7 +9,6 @@ NODE_UNIT: str = "n"
 EDGE_UNIT: str = "e"
-@dataclass
 class BFSPartitioner(BasePartitioner):
     """
     BFS partitioner that partitions the graph into communities of a fixed size.

 import random
 from collections import deque
 from typing import Any, List
 from graphgen.bases import BaseGraphStorage, BasePartitioner
 EDGE_UNIT: str = "e"
 class BFSPartitioner(BasePartitioner):
     """
     BFS partitioner that partitions the graph into communities of a fixed size.

graphgen/models/partitioner/dfs_partitioner.py CHANGED Viewed

@@ -1,5 +1,4 @@
 import random
-from dataclasses import dataclass
 from typing import Any, List
 from graphgen.bases import BaseGraphStorage, BasePartitioner
@@ -9,7 +8,6 @@ NODE_UNIT: str = "n"
 EDGE_UNIT: str = "e"
-@dataclass
 class DFSPartitioner(BasePartitioner):
     """
     DFS partitioner that partitions the graph into communities of a fixed size.

 import random
 from typing import Any, List
 from graphgen.bases import BaseGraphStorage, BasePartitioner
 EDGE_UNIT: str = "e"
 class DFSPartitioner(BasePartitioner):
     """
     DFS partitioner that partitions the graph into communities of a fixed size.

graphgen/models/partitioner/ece_partitioner.py CHANGED Viewed

@@ -1,6 +1,5 @@
 import asyncio
 import random
-from dataclasses import dataclass
 from typing import Any, Dict, List, Optional, Set, Tuple
 from tqdm.asyncio import tqdm as tqdm_async
@@ -13,7 +12,6 @@ NODE_UNIT: str = "n"
 EDGE_UNIT: str = "e"
-@dataclass
 class ECEPartitioner(BFSPartitioner):
     """
     ECE partitioner that partitions the graph into communities based on Expected Calibration Error (ECE).

 import asyncio
 import random
 from typing import Any, Dict, List, Optional, Set, Tuple
 from tqdm.asyncio import tqdm as tqdm_async
 EDGE_UNIT: str = "e"
 class ECEPartitioner(BFSPartitioner):
     """
     ECE partitioner that partitions the graph into communities based on Expected Calibration Error (ECE).

graphgen/models/partitioner/leiden_partitioner.py CHANGED Viewed

@@ -1,5 +1,4 @@
 from collections import defaultdict
-from dataclasses import dataclass
 from typing import Any, Dict, List, Set, Tuple
 import igraph as ig
@@ -9,7 +8,6 @@ from graphgen.bases import BaseGraphStorage, BasePartitioner
 from graphgen.bases.datatypes import Community
-@dataclass
 class LeidenPartitioner(BasePartitioner):
     """
     Leiden partitioner that partitions the graph into communities using the Leiden algorithm.

 from collections import defaultdict
 from typing import Any, Dict, List, Set, Tuple
 import igraph as ig
 from graphgen.bases.datatypes import Community
 class LeidenPartitioner(BasePartitioner):
     """
     Leiden partitioner that partitions the graph into communities using the Leiden algorithm.

graphgen/models/search/db/uniprot_search.py CHANGED Viewed

@@ -1,5 +1,3 @@
-from dataclasses import dataclass
 import requests
 from fastapi import HTTPException
@@ -8,7 +6,6 @@ from graphgen.utils import logger
 UNIPROT_BASE = "https://rest.uniprot.org/uniprotkb/search"
-@dataclass
 class UniProtSearch:
     """
     UniProt Search client to search with UniProt.

 import requests
 from fastapi import HTTPException
 UNIPROT_BASE = "https://rest.uniprot.org/uniprotkb/search"
 class UniProtSearch:
     """
     UniProt Search client to search with UniProt.

graphgen/models/search/kg/wiki_search.py CHANGED Viewed

@@ -1,4 +1,3 @@
-from dataclasses import dataclass
 from typing import List, Union
 import wikipedia
@@ -7,7 +6,6 @@ from wikipedia import set_lang
 from graphgen.utils import detect_main_language, logger
-@dataclass
 class WikiSearch:
     @staticmethod
     def set_language(language: str):

 from typing import List, Union
 import wikipedia
 from graphgen.utils import detect_main_language, logger
 class WikiSearch:
     @staticmethod
     def set_language(language: str):

graphgen/models/search/web/bing_search.py CHANGED Viewed

@@ -1,5 +1,3 @@
-from dataclasses import dataclass
 import requests
 from fastapi import HTTPException
@@ -9,13 +7,13 @@ BING_SEARCH_V7_ENDPOINT = "https://api.bing.microsoft.com/v7.0/search"
 BING_MKT = "en-US"
-@dataclass
 class BingSearch:
     """
     Bing Search client to search with Bing.
     """
-    subscription_key: str
     def search(self, query: str, num_results: int = 1):
         """

 import requests
 from fastapi import HTTPException
 BING_MKT = "en-US"
 class BingSearch:
     """
     Bing Search client to search with Bing.
     """
+    def __init__(self, subscription_key: str):
+        self.subscription_key = subscription_key
     def search(self, query: str, num_results: int = 1):
         """

graphgen/models/search/web/google_search.py CHANGED Viewed

@@ -1,5 +1,3 @@
-from dataclasses import dataclass
 import requests
 from fastapi import HTTPException
@@ -8,7 +6,6 @@ from graphgen.utils import logger
 GOOGLE_SEARCH_ENDPOINT = "https://customsearch.googleapis.com/customsearch/v1"
-@dataclass
 class GoogleSearch:
     def __init__(self, subscription_key: str, cx: str):
         """

 import requests
 from fastapi import HTTPException
 GOOGLE_SEARCH_ENDPOINT = "https://customsearch.googleapis.com/customsearch/v1"
 class GoogleSearch:
     def __init__(self, subscription_key: str, cx: str):
         """

graphgen/models/storage/json_storage.py CHANGED Viewed

@@ -53,6 +53,8 @@ class JsonKVStorage(BaseKVStorage):
 @dataclass
 class JsonListStorage(BaseListStorage):
     _data: list = None
     def __post_init__(self):

 @dataclass
 class JsonListStorage(BaseListStorage):
+    working_dir: str = None
+    namespace: str = None
     _data: list = None
     def __post_init__(self):

graphgen/models/tokenizer/__init__.py CHANGED Viewed

@@ -1,4 +1,3 @@
-from dataclasses import dataclass, field
 from typing import List
 from graphgen.bases import BaseTokenizer
@@ -30,16 +29,13 @@ def get_tokenizer_impl(tokenizer_name: str = "cl100k_base") -> BaseTokenizer:
     )
-@dataclass
 class Tokenizer(BaseTokenizer):
     """
     Encapsulates different tokenization implementations based on the specified model name.
     """
-    model_name: str = "cl100k_base"
-    _impl: BaseTokenizer = field(init=False, repr=False)
-    def __post_init__(self):
         if not self.model_name:
             raise ValueError("TOKENIZER_MODEL must be specified in the ENV variables.")
         self._impl = get_tokenizer_impl(self.model_name)

 from typing import List
 from graphgen.bases import BaseTokenizer
     )
 class Tokenizer(BaseTokenizer):
     """
     Encapsulates different tokenization implementations based on the specified model name.
     """
+    def __init__(self, model_name: str = "cl100k_base"):
+        super().__init__(model_name)
         if not self.model_name:
             raise ValueError("TOKENIZER_MODEL must be specified in the ENV variables.")
         self._impl = get_tokenizer_impl(self.model_name)

graphgen/models/tokenizer/hf_tokenizer.py CHANGED Viewed

@@ -1,4 +1,3 @@
-from dataclasses import dataclass
 from typing import List
 from transformers import AutoTokenizer
@@ -6,9 +5,9 @@ from transformers import AutoTokenizer
 from graphgen.bases import BaseTokenizer
-@dataclass
 class HFTokenizer(BaseTokenizer):
-    def __post_init__(self):
         self.enc = AutoTokenizer.from_pretrained(self.model_name)
     def encode(self, text: str) -> List[int]:

 from typing import List
 from transformers import AutoTokenizer
 from graphgen.bases import BaseTokenizer
 class HFTokenizer(BaseTokenizer):
+    def __init__(self, model_name: str = "cl100k_base"):
+        super().__init__(model_name)
         self.enc = AutoTokenizer.from_pretrained(self.model_name)
     def encode(self, text: str) -> List[int]:

graphgen/models/tokenizer/tiktoken_tokenizer.py CHANGED Viewed

@@ -1,4 +1,3 @@
-from dataclasses import dataclass
 from typing import List
 import tiktoken
@@ -6,9 +5,9 @@ import tiktoken
 from graphgen.bases import BaseTokenizer
-@dataclass
 class TiktokenTokenizer(BaseTokenizer):
-    def __post_init__(self):
         self.enc = tiktoken.get_encoding(self.model_name)
     def encode(self, text: str) -> List[int]:

 from typing import List
 import tiktoken
 from graphgen.bases import BaseTokenizer
 class TiktokenTokenizer(BaseTokenizer):
+    def __init__(self, model_name: str = "cl100k_base"):
+        super().__init__(model_name)
         self.enc = tiktoken.get_encoding(self.model_name)
     def encode(self, text: str) -> List[int]: