···26});
2728static TAG_REGEX: LazyLock<Regex> = LazyLock::new(|| {
29- // Simplified version - full unicode handling would need more work
30- Regex::new(r"(^|\s)[##]([^\s\x{00AD}\x{2060}\x{200A}\x{200B}\x{200C}\x{200D}]+)").unwrap()
00000000000031});
3233static MARKDOWN_LINK_REGEX: LazyLock<Regex> =
···552 let mut facets = Vec::new();
553554 for cap in TAG_REGEX.captures_iter(text) {
555- let tag_match = cap.get(2).unwrap();
0000556 let tag_str = tag_match.as_str();
00000557558 // Calculate trimmed length after stripping trailing punctuation
559 let trimmed_len = if let Some(trimmed) = TRAILING_PUNCT_REGEX.find(tag_str) {
···26});
2728static TAG_REGEX: LazyLock<Regex> = LazyLock::new(|| {
29+ // Pattern: (^|\s)[##](prefix* core+ suffix*)?
30+ //
31+ // - prefix: [^\s\u{00AD}...]* - any chars except spaces/zero-width (optional)
32+ // - core: [^\d\s\p{P}\u{00AD}...]+ - at least one char that's not digit/space/punct/zero-width (required)
33+ // - suffix: [^\s\u{00AD}...]* - any chars except spaces/zero-width (optional)
34+ //
35+ // Zero-width chars excluded: \u{00AD} (soft hyphen), \u{2060} (word joiner),
36+ // \u{200A}-\u{200D} (hair space, zero-width space/joiner/non-joiner), \u{20e2} (combining mark)
37+ //
38+ // Note: emoji modifier (\ufe0f) is filtered in detect_tags() since Rust regex
39+ // doesn't support negative lookahead
40+ Regex::new(
41+ r"(^|\s)[##]([^\s\u{00AD}\u{2060}\u{200A}\u{200B}\u{200C}\u{200D}\u{20e2}]*[^\d\s\p{P}\u{00AD}\u{2060}\u{200A}\u{200B}\u{200C}\u{200D}\u{20e2}]+[^\s\u{00AD}\u{2060}\u{200A}\u{200B}\u{200C}\u{200D}\u{20e2}]*)?"
42+ ).unwrap()
43});
4445static MARKDOWN_LINK_REGEX: LazyLock<Regex> =
···564 let mut facets = Vec::new();
565566 for cap in TAG_REGEX.captures_iter(text) {
567+ // capture group 2 is optional, skip if empty (just # with nothing after)
568+ let tag_match = match cap.get(2) {
569+ Some(m) => m,
570+ None => continue,
571+ };
572 let tag_str = tag_match.as_str();
573+574+ // Filter out tags starting with emoji modifier (since regex can't do negative lookahead)
575+ if tag_str.starts_with('\u{fe0f}') {
576+ continue;
577+ }
578579 // Calculate trimmed length after stripping trailing punctuation
580 let trimmed_len = if let Some(trimmed) = TRAILING_PUNCT_REGEX.find(tag_str) {