GraphGen / graphgen /models /splitter /markdown_splitter.py
github-actions[bot]
Auto-sync from demo at Wed Sep 24 09:52:41 UTC 2025
43d27f2
from typing import Any
from graphgen.models.splitter.recursive_character_splitter import (
RecursiveCharacterSplitter,
)
class MarkdownTextRefSplitter(RecursiveCharacterSplitter):
"""Attempts to split the text along Markdown-formatted headings."""
def __init__(self, **kwargs: Any) -> None:
"""Initialize a MarkdownTextRefSplitter."""
separators = [
# First, try to split along Markdown headings (starting with level 2)
"\n#{1,6} ",
# Note the alternative syntax for headings (below) is not handled here
# Heading level 2
# ---------------
# End of code block
"```\n",
# Horizontal lines
"\n\\*\\*\\*+\n",
"\n---+\n",
"\n___+\n",
# Note: horizontal lines defined by three or more of ***, ---, or ___
# are handled by the regexes above, but alternative syntaxes (e.g., with spaces)
# are not handled.
"\n\n",
"\n",
" ",
"",
]
super().__init__(separators=separators, **kwargs)