tangled
alpha
login
or
join now
blooym.dev
/
jacquard
forked from
nonbinary.computer/jacquard
0
fork
atom
A better Rust ATProto crate
0
fork
atom
overview
issues
pulls
pipelines
fixed tag regex
Orual
4 months ago
c251e98b
c2bb8963
+24
-3
1 changed file
expand all
collapse all
unified
split
crates
jacquard
src
richtext.rs
+24
-3
crates/jacquard/src/richtext.rs
···
26
26
});
27
27
28
28
static TAG_REGEX: LazyLock<Regex> = LazyLock::new(|| {
29
29
-
// Simplified version - full unicode handling would need more work
30
30
-
Regex::new(r"(^|\s)[##]([^\s\x{00AD}\x{2060}\x{200A}\x{200B}\x{200C}\x{200D}]+)").unwrap()
29
29
+
// Pattern: (^|\s)[##](prefix* core+ suffix*)?
30
30
+
//
31
31
+
// - prefix: [^\s\u{00AD}...]* - any chars except spaces/zero-width (optional)
32
32
+
// - core: [^\d\s\p{P}\u{00AD}...]+ - at least one char that's not digit/space/punct/zero-width (required)
33
33
+
// - suffix: [^\s\u{00AD}...]* - any chars except spaces/zero-width (optional)
34
34
+
//
35
35
+
// Zero-width chars excluded: \u{00AD} (soft hyphen), \u{2060} (word joiner),
36
36
+
// \u{200A}-\u{200D} (hair space, zero-width space/joiner/non-joiner), \u{20e2} (combining mark)
37
37
+
//
38
38
+
// Note: emoji modifier (\ufe0f) is filtered in detect_tags() since Rust regex
39
39
+
// doesn't support negative lookahead
40
40
+
Regex::new(
41
41
+
r"(^|\s)[##]([^\s\u{00AD}\u{2060}\u{200A}\u{200B}\u{200C}\u{200D}\u{20e2}]*[^\d\s\p{P}\u{00AD}\u{2060}\u{200A}\u{200B}\u{200C}\u{200D}\u{20e2}]+[^\s\u{00AD}\u{2060}\u{200A}\u{200B}\u{200C}\u{200D}\u{20e2}]*)?"
42
42
+
).unwrap()
31
43
});
32
44
33
45
static MARKDOWN_LINK_REGEX: LazyLock<Regex> =
···
552
564
let mut facets = Vec::new();
553
565
554
566
for cap in TAG_REGEX.captures_iter(text) {
555
555
-
let tag_match = cap.get(2).unwrap();
567
567
+
// capture group 2 is optional, skip if empty (just # with nothing after)
568
568
+
let tag_match = match cap.get(2) {
569
569
+
Some(m) => m,
570
570
+
None => continue,
571
571
+
};
556
572
let tag_str = tag_match.as_str();
573
573
+
574
574
+
// Filter out tags starting with emoji modifier (since regex can't do negative lookahead)
575
575
+
if tag_str.starts_with('\u{fe0f}') {
576
576
+
continue;
577
577
+
}
557
578
558
579
// Calculate trimmed length after stripping trailing punctuation
559
580
let trimmed_len = if let Some(trimmed) = TRAILING_PUNCT_REGEX.find(tag_str) {