GraphGen / graphgen /models /splitter /character_splitter.py
github-actions[bot]
Auto-sync from demo at Wed Sep 24 09:52:41 UTC 2025
43d27f2
import re
from typing import Any, List
from graphgen.bases.base_splitter import BaseSplitter
class CharacterSplitter(BaseSplitter):
"""Splitting text that looks at characters."""
def __init__(
self, separator: str = "\n\n", is_separator_regex: bool = False, **kwargs: Any
) -> None:
"""Create a new TextSplitter."""
super().__init__(**kwargs)
self._separator = separator
self._is_separator_regex = is_separator_regex
def split_text(self, text: str) -> List[str]:
"""Split incoming text and return chunks."""
# First we naively split the large input into a bunch of smaller ones.
separator = (
self._separator if self._is_separator_regex else re.escape(self._separator)
)
splits = self._split_text_with_regex(text, separator, self.keep_separator)
_separator = "" if self.keep_separator else self._separator
return self._merge_splits(splits, _separator)