| /** | |
| * Citation Regex Patterns | |
| * | |
| * These patterns handle two formats that LLMs may output: | |
| * 1. Literal escape sequences: "\ue202turn0search0" (backslash + "ue202" = 6 chars) | |
| * 2. Actual Unicode characters: "turn0search0" (U+E202 = 1 char, private use area) | |
| * | |
| * The system instructs LLMs to output literal escape sequences, but some models | |
| * may convert them to actual Unicode characters during text generation. These | |
| * dual-format patterns ensure robust citation handling regardless of output format. | |
| * | |
| * Citation Format: | |
| * - \ue202 / U+E202: Standalone citation marker (before each anchor) | |
| * - \ue200 / U+E200: Composite group start | |
| * - \ue201 / U+E201: Composite group end | |
| * - \ue203 / U+E203: Highlight span start | |
| * - \ue204 / U+E204: Highlight span end | |
| * | |
| * Anchor Pattern: turn{N}{type}{index} | |
| * - N: Turn number (0-based) | |
| * - type: search|image|news|video|ref|file | |
| * - index: Result index within that type (0-based) | |
| * | |
| * Examples: | |
| * - Standalone: "Statement.\ue202turn0search0" | |
| * - Composite: "\ue200\ue202turn0search0\ue202turn0news1\ue201" | |
| * - Highlighted: "\ue203Cited text.\ue204\ue202turn0search0" | |
| */ | |
| /** Matches highlighted text spans in both literal and Unicode formats */ | |
| export const SPAN_REGEX = /((?:\\ue203|\ue203).*?(?:\\ue204|\ue204))/g; | |
| /** Matches composite citation blocks (multiple citations grouped together) */ | |
| export const COMPOSITE_REGEX = /((?:\\ue200|\ue200).*?(?:\\ue201|\ue201))/g; | |
| /** Matches standalone citation anchors with turn, type, and index capture groups */ | |
| export const STANDALONE_PATTERN = | |
| /(?:\\ue202|\ue202)turn(\d+)(search|image|news|video|ref|file)(\d+)/g; | |
| /** Removes all citation marker characters from text for clean display */ | |
| export const CLEANUP_REGEX = | |
| /\\ue200|\\ue201|\\ue202|\\ue203|\\ue204|\\ue206|\ue200|\ue201|\ue202|\ue203|\ue204|\ue206/g; | |
| /** Matches invalid/orphaned citations (with leading whitespace) for removal */ | |
| export const INVALID_CITATION_REGEX = | |
| /\s*(?:\\ue202|\ue202)turn\d+(search|news|image|video|ref|file)\d+/g; | |