/** * parser.ts — Note Parser (Pure) * * Parse markdown content into an array of Place objects. * This is a pure function with no side effects. */ export interface Place { name: string; url?: string; fields: Record; notes: string[]; lat?: number; lng?: number; startLine: number; endLine: number; } /** Regex for top-level bullet: `* ` or `- ` at column 0 */ const TOP_BULLET_RE = /^[*-] /; /** * Regex for sub-bullet: any leading whitespace (tab/spaces, 1+ chars for tab, * 2+ chars for spaces) followed by `* ` or `- `. Uses a flat character class * instead of nested quantifiers to avoid catastrophic backtracking (ReDoS). */ const SUB_BULLET_RE = /^[\t ]{2,}[*-] |^\t[*-] /; /** Regex for structured field: single word key, colon, space, then value */ const FIELD_RE = /^(\w+): (.*)$/; /** Regex for markdown link: [text](url) or [text](url "title") */ const MD_LINK_RE = /^\[([^\]]*)\]\(([^)"]*?)(?:\s+"[^"]*")?\)$/; /** Regex for wiki-link: [[Page]] or [[Target|Display]] */ const WIKI_LINK_RE = /^\[\[([^\]]*)\]\]$/; /** * Regex for valid geo coordinates. * Requires digits (not just a dot), optional decimal part with digits after dot. * Format: lat,lng with optional space after comma. */ const GEO_RE = /^(-?\d+(?:\.\d+)?),\s*(-?\d+(?:\.\d+)?)$/; /** * Parse the name portion of a top-level bullet, handling markdown links, * wiki-links, and plain text. */ function parseName(raw: string): { name: string; url?: string } { // Try markdown link const mdMatch = raw.match(MD_LINK_RE); if (mdMatch) { const text = mdMatch[1]; const href = mdMatch[2]; return { name: text, url: href || undefined, }; } // Try wiki-link const wikiMatch = raw.match(WIKI_LINK_RE); if (wikiMatch) { const inner = wikiMatch[1]; const pipeIdx = inner.indexOf("|"); if (pipeIdx !== -1) { const display = inner.substring(pipeIdx + 1); return { name: display }; } return { name: inner }; } // Plain text return { name: raw }; } /** * Parse geo field value into lat/lng if valid. */ function parseGeo(value: string): { lat?: number; lng?: number } { const match = value.match(GEO_RE); if (!match) return {}; const lat = parseFloat(match[1]); const lng = parseFloat(match[2]); if (lat < -90 || lat > 90 || lng < -180 || lng > 180) return {}; return { lat, lng }; } /** * Extract the text content from a sub-bullet line, stripping indentation * and bullet prefix. */ function extractSubBulletText(line: string): string { return line.replace(SUB_BULLET_RE, "").trim(); } export function parsePlaces(content: string): Place[] { if (!content) return []; // Normalize Windows line endings const normalized = content.replace(/\r\n/g, "\n").replace(/\r/g, "\n"); const lines = normalized.split("\n"); const places: Place[] = []; let current: { name: string; url?: string; fields: Record; notes: string[]; startLine: number; endLine: number; } | null = null; for (let i = 0; i < lines.length; i++) { const line = lines[i]; if (TOP_BULLET_RE.test(line)) { // Finalize previous place if (current) { finalizePlace(current, places); } // Extract raw name after bullet prefix const raw = line.replace(/^[*-] /, "").trim(); const { name, url } = parseName(raw); current = { name, url, fields: Object.create(null) as Record, notes: [], startLine: i, endLine: i, }; } else if (SUB_BULLET_RE.test(line) && current) { // Sub-bullet belongs to current place current.endLine = i; const text = extractSubBulletText(line); // Try to parse as field const fieldMatch = text.match(FIELD_RE); if (fieldMatch) { const key = fieldMatch[1].toLowerCase(); const value = fieldMatch[2].trim(); current.fields[key] = value; } else if (text) { current.notes.push(text); } } // Non-bullet lines are ignored (dead zones) } // Finalize last place if (current) { finalizePlace(current, places); } return places; } /** * Finalize a place block: parse geo, exclude empty names, push to results. */ function finalizePlace( block: { name: string; url?: string; fields: Record; notes: string[]; startLine: number; endLine: number; }, places: Place[] ): void { // Exclude empty/whitespace-only names if (!block.name.trim()) return; const place: Place = { name: block.name, url: block.url, fields: block.fields, notes: block.notes, startLine: block.startLine, endLine: block.endLine, }; // Parse geo if present if (block.fields.geo) { const { lat, lng } = parseGeo(block.fields.geo); if (lat !== undefined && lng !== undefined) { place.lat = lat; place.lng = lng; } } places.push(place); }