Spaces:
Sleeping
Sleeping
| import re | |
| from typing import Any, List | |
| from graphgen.bases.base_splitter import BaseSplitter | |
| class CharacterSplitter(BaseSplitter): | |
| """Splitting text that looks at characters.""" | |
| def __init__( | |
| self, separator: str = "\n\n", is_separator_regex: bool = False, **kwargs: Any | |
| ) -> None: | |
| """Create a new TextSplitter.""" | |
| super().__init__(**kwargs) | |
| self._separator = separator | |
| self._is_separator_regex = is_separator_regex | |
| def split_text(self, text: str) -> List[str]: | |
| """Split incoming text and return chunks.""" | |
| # First we naively split the large input into a bunch of smaller ones. | |
| separator = ( | |
| self._separator if self._is_separator_regex else re.escape(self._separator) | |
| ) | |
| splits = self._split_text_with_regex(text, separator, self.keep_separator) | |
| _separator = "" if self.keep_separator else self._separator | |
| return self._merge_splits(splits, _separator) | |