| | import { visit } from 'unist-util-visit'; |
| | import type { Node } from 'unist'; |
| | import type { Citation, CitationNode } from './types'; |
| | import { SPAN_REGEX, STANDALONE_PATTERN, CLEANUP_REGEX, COMPOSITE_REGEX } from '~/utils/citations'; |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | function isStandaloneMarker(text: string, position: number): boolean { |
| | const beforeText = text.substring(0, position); |
| |
|
| | |
| | const lastUe200Literal = beforeText.lastIndexOf('\\ue200'); |
| | const lastUe200Char = beforeText.lastIndexOf('\ue200'); |
| | const lastUe200 = Math.max(lastUe200Literal, lastUe200Char); |
| |
|
| | |
| | const lastUe201Literal = beforeText.lastIndexOf('\\ue201'); |
| | const lastUe201Char = beforeText.lastIndexOf('\ue201'); |
| | const lastUe201 = Math.max(lastUe201Literal, lastUe201Char); |
| |
|
| | |
| | return lastUe200 === -1 || (lastUe201 !== -1 && lastUe201 > lastUe200); |
| | } |
| |
|
| | |
| | |
| | |
| | function findNextMatch( |
| | text: string, |
| | position: number, |
| | ): { type: string; match: RegExpExecArray | null; index: number } | null { |
| | |
| | SPAN_REGEX.lastIndex = position; |
| | COMPOSITE_REGEX.lastIndex = position; |
| | STANDALONE_PATTERN.lastIndex = position; |
| |
|
| | |
| | const spanMatch = SPAN_REGEX.exec(text); |
| | const compositeMatch = COMPOSITE_REGEX.exec(text); |
| |
|
| | |
| | let standaloneMatch: RegExpExecArray | null = null; |
| | STANDALONE_PATTERN.lastIndex = position; |
| |
|
| | |
| | let match: RegExpExecArray | null; |
| | while (!standaloneMatch && (match = STANDALONE_PATTERN.exec(text)) !== null) { |
| | if (isStandaloneMarker(text, match.index)) { |
| | standaloneMatch = match; |
| | } |
| | } |
| |
|
| | |
| | let nextMatch: RegExpExecArray | null = null; |
| | let matchType = ''; |
| | let matchIndex = -1; |
| | let typeIndex = -1; |
| |
|
| | if (spanMatch && (!nextMatch || spanMatch.index < matchIndex || matchIndex === -1)) { |
| | nextMatch = spanMatch; |
| | matchType = 'span'; |
| | matchIndex = spanMatch.index; |
| | |
| | typeIndex = 0; |
| | } |
| |
|
| | if (compositeMatch && (!nextMatch || compositeMatch.index < matchIndex || matchIndex === -1)) { |
| | nextMatch = compositeMatch; |
| | matchType = 'composite'; |
| | matchIndex = compositeMatch.index; |
| | typeIndex = 0; |
| | } |
| |
|
| | if (standaloneMatch && (!nextMatch || standaloneMatch.index < matchIndex || matchIndex === -1)) { |
| | nextMatch = standaloneMatch; |
| | matchType = 'standalone'; |
| | matchIndex = standaloneMatch.index; |
| | typeIndex = 0; |
| | } |
| |
|
| | if (!nextMatch) return null; |
| |
|
| | return { type: matchType, match: nextMatch, index: typeIndex }; |
| | } |
| |
|
| | function processTree(tree: Node) { |
| | visit(tree, 'text', (node, index, parent) => { |
| | const textNode = node as CitationNode; |
| | const parentNode = parent as CitationNode; |
| |
|
| | if (typeof textNode.value !== 'string') return; |
| |
|
| | const originalValue = textNode.value; |
| | const segments: Array<CitationNode> = []; |
| |
|
| | |
| | let currentPosition = 0; |
| |
|
| | |
| | |
| | const citationIds = new Map<number, string>(); |
| | const typeCounts = { span: 0, composite: 0, standalone: 0 }; |
| |
|
| | while (currentPosition < originalValue.length) { |
| | const nextMatchInfo = findNextMatch(originalValue, currentPosition); |
| |
|
| | if (!nextMatchInfo) { |
| | |
| | const remainingText = originalValue.substring(currentPosition).replace(CLEANUP_REGEX, ''); |
| | if (remainingText) { |
| | segments.push({ type: 'text', value: remainingText }); |
| | } |
| | break; |
| | } |
| |
|
| | const { type, match } = nextMatchInfo; |
| | const matchIndex = match!.index; |
| | const matchText = match![0]; |
| |
|
| | |
| | if (matchIndex > currentPosition) { |
| | const textBeforeMatch = originalValue |
| | .substring(currentPosition, matchIndex) |
| | .replace(CLEANUP_REGEX, ''); |
| |
|
| | if (textBeforeMatch) { |
| | segments.push({ type: 'text', value: textBeforeMatch }); |
| | } |
| | } |
| |
|
| | |
| | const citationId = `${type}-${typeCounts[type as keyof typeof typeCounts]}-${matchIndex}`; |
| | citationIds.set(matchIndex, citationId); |
| |
|
| | |
| | switch (type) { |
| | case 'span': { |
| | const spanText = matchText; |
| | const cleanText = spanText.replace(/\\ue203|\\ue204/g, ''); |
| |
|
| | |
| | let associatedCitationId: string | null = null; |
| | const endOfSpan = matchIndex + matchText.length; |
| |
|
| | |
| | const nextCitation = findNextMatch(originalValue, endOfSpan); |
| | if ( |
| | nextCitation && |
| | (nextCitation.type === 'standalone' || nextCitation.type === 'composite') && |
| | nextCitation.match!.index - endOfSpan < 5 |
| | ) { |
| | |
| | const nextIndex = nextCitation.match!.index; |
| | const nextType = nextCitation.type; |
| | associatedCitationId = `${nextType}-${typeCounts[nextType as keyof typeof typeCounts]}-${nextIndex}`; |
| | } |
| |
|
| | segments.push({ |
| | type: 'highlighted-text', |
| | data: { |
| | hName: 'highlighted-text', |
| | hProperties: { citationId: associatedCitationId }, |
| | }, |
| | children: [{ type: 'text', value: cleanText }], |
| | }); |
| |
|
| | typeCounts.span++; |
| | break; |
| | } |
| |
|
| | case 'composite': { |
| | const compositeText = matchText; |
| |
|
| | |
| | const compositeRefRegex = new RegExp(STANDALONE_PATTERN.source, 'g'); |
| | let refMatch: RegExpExecArray | null; |
| | const citations: Array<Citation> = []; |
| |
|
| | while ((refMatch = compositeRefRegex.exec(compositeText)) !== null) { |
| | const turn = Number(refMatch[1]); |
| | const refType = refMatch[2]; |
| | const refIndex = Number(refMatch[3]); |
| |
|
| | citations.push({ |
| | turn, |
| | refType, |
| | index: refIndex, |
| | }); |
| | } |
| |
|
| | if (citations.length > 0) { |
| | segments.push({ |
| | type: 'composite-citation', |
| | data: { |
| | hName: 'composite-citation', |
| | hProperties: { |
| | citations, |
| | citationId: citationId, |
| | }, |
| | }, |
| | }); |
| | } |
| |
|
| | typeCounts.composite++; |
| | break; |
| | } |
| |
|
| | case 'standalone': { |
| | |
| | const turn = Number(match![1]); |
| | const refType = match![2]; |
| | const refIndex = Number(match![3]); |
| |
|
| | segments.push({ |
| | type: 'citation', |
| | data: { |
| | hName: 'citation', |
| | hProperties: { |
| | citation: { |
| | turn, |
| | refType, |
| | index: refIndex, |
| | }, |
| | citationType: 'standalone', |
| | citationId: citationId, |
| | }, |
| | }, |
| | }); |
| |
|
| | typeCounts.standalone++; |
| | break; |
| | } |
| | } |
| |
|
| | |
| | currentPosition = matchIndex + matchText.length; |
| | } |
| |
|
| | |
| | if (segments.length > 0 && index !== undefined) { |
| | parentNode.children?.splice(index, 1, ...segments); |
| | return index + segments.length; |
| | } else if (textNode.value !== textNode.value.replace(CLEANUP_REGEX, '')) { |
| | |
| | textNode.value = textNode.value.replace(CLEANUP_REGEX, ''); |
| | } |
| | }); |
| | } |
| |
|
| | export function unicodeCitation() { |
| | return (tree: Node) => { |
| | processTree(tree); |
| | }; |
| | } |
| |
|