The smokesignal.events web application

feature: event facets

+1016 -10
+1
Cargo.toml
··· 58 58 reqwest-chain = "1" 59 59 reqwest-middleware = { version = "0.4", features = ["http2", "json", "multipart"] } 60 60 reqwest-retry = "0.7" 61 + regex = "1" 61 62 duration-str = "0.11" 62 63 minijinja = { version = "2.7", features = ["builtins", "json", "urlencode"] } 63 64 minijinja-autoreload = { version = "2.7", optional = true }
+22
src/config.rs
··· 65 65 pub enable_opensearch: bool, 66 66 pub enable_task_opensearch: bool, 67 67 pub opensearch_endpoint: Option<String>, 68 + pub facets_mentions_max: usize, 69 + pub facets_tags_max: usize, 70 + pub facets_links_max: usize, 71 + pub facets_max: usize, 68 72 } 69 73 70 74 impl Config { ··· 156 160 } 157 161 }; 158 162 163 + // Parse facet limit configuration 164 + let facets_mentions_max = default_env("FACETS_MENTIONS_MAX", "5") 165 + .parse::<usize>() 166 + .unwrap_or(5); 167 + let facets_tags_max = default_env("FACETS_TAGS_MAX", "5") 168 + .parse::<usize>() 169 + .unwrap_or(5); 170 + let facets_links_max = default_env("FACETS_LINKS_MAX", "5") 171 + .parse::<usize>() 172 + .unwrap_or(5); 173 + let facets_max = default_env("FACETS_MAX", "10") 174 + .parse::<usize>() 175 + .unwrap_or(10); 176 + 159 177 Ok(Self { 160 178 version: version()?, 161 179 http_port, ··· 181 199 enable_opensearch, 182 200 enable_task_opensearch, 183 201 opensearch_endpoint, 202 + facets_mentions_max, 203 + facets_tags_max, 204 + facets_links_max, 205 + facets_max, 184 206 }) 185 207 } 186 208
+896
src/facets.rs
··· 1 + //! Rich text facet structures and rendering for AT Protocol. 2 + //! 3 + //! This module provides structures for handling rich text facets (mentions, links, hashtags), 4 + //! parsing them from text, and rendering them as HTML for display in the UI. 5 + //! 6 + //! # Byte Offset Calculation 7 + //! 8 + //! This implementation correctly uses UTF-8 byte offsets as required by AT Protocol. 9 + //! The facets use "inclusive start and exclusive end" byte ranges. All parsing is done 10 + //! using `regex::bytes::Regex` which operates on byte slices and returns byte positions, 11 + //! ensuring correct handling of multi-byte UTF-8 characters (emojis, CJK, accented chars). 12 + 13 + use atproto_identity::resolve::IdentityResolver; 14 + use atproto_record::lexicon::app::bsky::richtext::facet::{Facet, FacetFeature, ByteSlice, Mention, Link, Tag}; 15 + use regex::bytes::Regex; 16 + use std::fmt::Write; 17 + 18 + /// Configuration for facet parsing and rendering limits 19 + #[derive(Debug, Clone, Copy)] 20 + pub struct FacetLimits { 21 + /// Maximum number of mention facets to process (default: 5) 22 + pub mentions_max: usize, 23 + /// Maximum number of tag facets to process (default: 5) 24 + pub tags_max: usize, 25 + /// Maximum number of link facets to process (default: 5) 26 + pub links_max: usize, 27 + /// Maximum total number of facets to process (default: 10) 28 + pub max: usize, 29 + } 30 + 31 + impl Default for FacetLimits { 32 + fn default() -> Self { 33 + Self { 34 + mentions_max: 5, 35 + tags_max: 5, 36 + links_max: 5, 37 + max: 10, 38 + } 39 + } 40 + } 41 + 42 + /// Mention span with byte positions and handle 43 + #[derive(Debug)] 44 + pub struct MentionSpan { 45 + pub start: usize, 46 + pub end: usize, 47 + pub handle: String, 48 + } 49 + 50 + /// URL span with byte positions and URL 51 + #[derive(Debug)] 52 + pub struct UrlSpan { 53 + pub start: usize, 54 + pub end: usize, 55 + pub url: String, 56 + } 57 + 58 + /// Tag span with byte positions and tag text 59 + #[derive(Debug)] 60 + pub struct TagSpan { 61 + pub start: usize, 62 + pub end: usize, 63 + pub tag: String, 64 + } 65 + 66 + /// Parse mentions from text and return their byte positions 67 + /// This function excludes mentions that appear within URLs 68 + pub fn parse_mentions(text: &str) -> Vec<MentionSpan> { 69 + let mut spans = Vec::new(); 70 + 71 + // First, parse all URLs to exclude mention matches within them 72 + let url_spans = parse_urls(text); 73 + 74 + // Regex based on: https://atproto.com/specs/handle#handle-identifier-syntax 75 + // Pattern: [$|\W](@([a-zA-Z0-9]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?\.)+[a-zA-Z]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?) 76 + let mention_regex = Regex::new( 77 + r"(?:^|[^\w])(@([a-zA-Z0-9]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?\.)+[a-zA-Z]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)" 78 + ).unwrap(); 79 + 80 + let text_bytes = text.as_bytes(); 81 + for capture in mention_regex.captures_iter(text_bytes) { 82 + if let Some(mention_match) = capture.get(1) { 83 + let start = mention_match.start(); 84 + let end = mention_match.end(); 85 + 86 + // Check if this mention overlaps with any URL 87 + let overlaps_url = url_spans.iter().any(|url| { 88 + // Check if mention is within or overlaps the URL span 89 + (start >= url.start && start < url.end) || (end > url.start && end <= url.end) 90 + }); 91 + 92 + // Only add the mention if it doesn't overlap with a URL 93 + if !overlaps_url { 94 + let handle = std::str::from_utf8(&mention_match.as_bytes()[1..]) 95 + .unwrap_or_default() 96 + .to_string(); 97 + 98 + spans.push(MentionSpan { start, end, handle }); 99 + } 100 + } 101 + } 102 + 103 + spans 104 + } 105 + 106 + /// Parse URLs from text and return their byte positions 107 + pub fn parse_urls(text: &str) -> Vec<UrlSpan> { 108 + let mut spans = Vec::new(); 109 + 110 + // Partial/naive URL regex based on: https://stackoverflow.com/a/3809435 111 + // Pattern: [$|\W](https?:\/\/(www\.)?[-a-zA-Z0-9@:%._\+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b([-a-zA-Z0-9()@:%_\+.~#?&//=]*[-a-zA-Z0-9@%_\+~#//=])?) 112 + let url_regex = Regex::new( 113 + r"(?:^|[^\w])(https?://(?:www\.)?[-a-zA-Z0-9@:%._\+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b(?:[-a-zA-Z0-9()@:%_\+.~#?&//=]*[-a-zA-Z0-9@%_\+~#//=])?)" 114 + ).unwrap(); 115 + 116 + let text_bytes = text.as_bytes(); 117 + for capture in url_regex.captures_iter(text_bytes) { 118 + if let Some(url_match) = capture.get(1) { 119 + let url = std::str::from_utf8(url_match.as_bytes()) 120 + .unwrap_or_default() 121 + .to_string(); 122 + 123 + spans.push(UrlSpan { 124 + start: url_match.start(), 125 + end: url_match.end(), 126 + url, 127 + }); 128 + } 129 + } 130 + 131 + spans 132 + } 133 + 134 + /// Parse hashtags from text and return their byte positions 135 + pub fn parse_tags(text: &str) -> Vec<TagSpan> { 136 + let mut spans = Vec::new(); 137 + 138 + // Regex based on: https://github.com/bluesky-social/atproto/blob/d91988fe79030b61b556dd6f16a46f0c3b9d0b44/packages/api/src/rich-text/util.ts 139 + // Simplified for Rust - matches hashtags at word boundaries 140 + // Pattern matches: start of string or non-word char, then # or #, then tag content 141 + let tag_regex = Regex::new(r"(?:^|[^\w])([##])([\w]+(?:[\w]*)*)").unwrap(); 142 + 143 + let text_bytes = text.as_bytes(); 144 + 145 + // Work with bytes for proper position tracking 146 + for capture in tag_regex.captures_iter(text_bytes) { 147 + if let (Some(full_match), Some(hash_match), Some(tag_match)) = 148 + (capture.get(0), capture.get(1), capture.get(2)) 149 + { 150 + // Calculate the absolute byte position of the hash symbol 151 + // The full match includes the preceding character (if any) 152 + // so we need to adjust for that 153 + let match_start = full_match.start(); 154 + let hash_offset = hash_match.start() - full_match.start(); 155 + let start = match_start + hash_offset; 156 + let end = match_start + hash_offset + hash_match.len() + tag_match.len(); 157 + 158 + // Extract just the tag text (without the hash symbol) 159 + // Normalize to lowercase for case-insensitive tag matching 160 + let tag = std::str::from_utf8(tag_match.as_bytes()) 161 + .unwrap_or_default() 162 + .to_lowercase(); 163 + 164 + // Only include tags that are not purely numeric 165 + if !tag.chars().all(|c| c.is_ascii_digit()) { 166 + spans.push(TagSpan { start, end, tag }); 167 + } 168 + } 169 + } 170 + 171 + spans 172 + } 173 + 174 + /// Parse facets from text and return a vector of Facet objects. 175 + /// 176 + /// This function extracts mentions, URLs, and hashtags from the provided text 177 + /// and creates AT Protocol facets with proper byte indices. 178 + /// 179 + /// Mentions are resolved to actual DIDs using the provided identity resolver. 180 + /// If a handle cannot be resolved to a DID, the mention facet is skipped. 181 + /// 182 + /// # Arguments 183 + /// * `text` - The text to extract facets from 184 + /// * `identity_resolver` - Resolver for converting handles to DIDs 185 + /// * `limits` - Configuration for maximum facets per type and total 186 + /// 187 + /// # Returns 188 + /// Optional vector of facets. Returns None if no facets were found. 189 + pub async fn parse_facets_from_text( 190 + text: &str, 191 + identity_resolver: &dyn IdentityResolver, 192 + limits: &FacetLimits, 193 + ) -> Option<Vec<Facet>> { 194 + let mut facets = Vec::new(); 195 + 196 + // Parse mentions (limited by mentions_max) 197 + let mention_spans = parse_mentions(text); 198 + let mut mention_count = 0; 199 + for mention in mention_spans { 200 + if mention_count >= limits.mentions_max { 201 + break; 202 + } 203 + 204 + // Try to resolve the handle to a DID 205 + // First try with at:// prefix, then without 206 + let at_uri = format!("at://{}", mention.handle); 207 + let did_result = match identity_resolver.resolve(&at_uri).await { 208 + Ok(doc) => Ok(doc), 209 + Err(_) => identity_resolver.resolve(&mention.handle).await, 210 + }; 211 + 212 + // Only add the mention facet if we successfully resolved the DID 213 + if let Ok(did_doc) = did_result { 214 + facets.push(Facet { 215 + index: ByteSlice { 216 + byte_start: mention.start, 217 + byte_end: mention.end, 218 + }, 219 + features: vec![FacetFeature::Mention(Mention { 220 + did: did_doc.id.to_string(), 221 + })], 222 + }); 223 + mention_count += 1; 224 + } 225 + // If resolution fails, we skip this mention facet entirely 226 + } 227 + 228 + // Parse URLs (limited by links_max) 229 + let url_spans = parse_urls(text); 230 + for (idx, url) in url_spans.into_iter().enumerate() { 231 + if idx >= limits.links_max { 232 + break; 233 + } 234 + facets.push(Facet { 235 + index: ByteSlice { 236 + byte_start: url.start, 237 + byte_end: url.end, 238 + }, 239 + features: vec![FacetFeature::Link(Link { uri: url.url })], 240 + }); 241 + } 242 + 243 + // Parse hashtags (limited by tags_max) 244 + let tag_spans = parse_tags(text); 245 + for (idx, tag_span) in tag_spans.into_iter().enumerate() { 246 + if idx >= limits.tags_max { 247 + break; 248 + } 249 + facets.push(Facet { 250 + index: ByteSlice { 251 + byte_start: tag_span.start, 252 + byte_end: tag_span.end, 253 + }, 254 + features: vec![FacetFeature::Tag(Tag { tag: tag_span.tag })], 255 + }); 256 + } 257 + 258 + // Apply global facet limit (truncate if exceeds max) 259 + if facets.len() > limits.max { 260 + facets.truncate(limits.max); 261 + } 262 + 263 + // Only return facets if we found any 264 + if !facets.is_empty() { 265 + Some(facets) 266 + } else { 267 + None 268 + } 269 + } 270 + 271 + /// HTML escape helper function 272 + fn html_escape(text: &str) -> String { 273 + text.chars() 274 + .map(|c| match c { 275 + '&' => "&amp;".to_string(), 276 + '<' => "&lt;".to_string(), 277 + '>' => "&gt;".to_string(), 278 + '"' => "&quot;".to_string(), 279 + '\'' => "&#39;".to_string(), 280 + c => c.to_string(), 281 + }) 282 + .collect() 283 + } 284 + 285 + /// Check if text contains HTML tags 286 + /// This is used to detect potentially malicious content 287 + fn contains_html_tags(text: &str) -> bool { 288 + // Look for patterns that indicate HTML tags 289 + // We're looking for < followed by either a letter, /, or ! 290 + let mut chars = text.chars().peekable(); 291 + while let Some(ch) = chars.next() { 292 + if ch == '<' 293 + && let Some(&next_ch) = chars.peek() 294 + { 295 + // Check if this looks like an HTML tag 296 + if next_ch.is_ascii_alphabetic() || next_ch == '/' || next_ch == '!' { 297 + return true; 298 + } 299 + } 300 + } 301 + false 302 + } 303 + 304 + /// Render text with facets as HTML. 305 + /// 306 + /// This function converts plain text with facet annotations into HTML with proper 307 + /// links for mentions, URLs, and hashtags based on the facet information. 308 + /// 309 + /// # HTML Output 310 + /// - Mentions: `<a href="/u/[did]">@handle</a>` 311 + /// - Links: `<a href="[url]" target="_blank" rel="noopener noreferrer">[url]</a>` 312 + /// - Tags: `<a href="/t/[tag]">#tag</a>` 313 + /// - Regular text is HTML-escaped for security 314 + /// 315 + /// # Arguments 316 + /// * `text` - The plain text content 317 + /// * `facets` - Optional facets to apply to the text 318 + /// * `limits` - Configuration for maximum facets per type and total 319 + /// 320 + /// # Returns 321 + /// HTML string with facets rendered as links 322 + pub fn render_text_with_facets_html( 323 + text: &str, 324 + facets: Option<&Vec<Facet>>, 325 + limits: &FacetLimits, 326 + ) -> String { 327 + // First, check if the text contains HTML tags 328 + // If it does, treat it as suspicious and just clean it without applying facets 329 + if contains_html_tags(text) { 330 + // Use ammonia to strip ALL HTML and return plain text 331 + let cleaned = ammonia::clean(text); 332 + // Convert newlines to <br> tags after cleaning 333 + return cleaned.replace('\n', "<br>"); 334 + } 335 + 336 + let text_bytes = text.as_bytes(); 337 + 338 + // If no facets, just return escaped text 339 + let Some(facets) = facets else { 340 + return html_escape(text); 341 + }; 342 + 343 + // Sort facets by start position to process them in order 344 + let mut sorted_facets: Vec<_> = facets.iter().collect(); 345 + sorted_facets.sort_by_key(|f| f.index.byte_start); 346 + 347 + // Apply limits: count facets by type and limit total 348 + let mut mention_count = 0; 349 + let mut link_count = 0; 350 + let mut tag_count = 0; 351 + let mut total_count = 0; 352 + 353 + let filtered_facets: Vec<_> = sorted_facets 354 + .into_iter() 355 + .filter(|facet| { 356 + if total_count >= limits.max { 357 + return false; 358 + } 359 + 360 + // Check facet type and apply per-type limits 361 + let should_include = facet.features.first().map_or(false, |feature| { 362 + match feature { 363 + FacetFeature::Mention(_) if mention_count < limits.mentions_max => { 364 + mention_count += 1; 365 + true 366 + } 367 + FacetFeature::Link(_) if link_count < limits.links_max => { 368 + link_count += 1; 369 + true 370 + } 371 + FacetFeature::Tag(_) if tag_count < limits.tags_max => { 372 + tag_count += 1; 373 + true 374 + } 375 + _ => false, 376 + } 377 + }); 378 + 379 + if should_include { 380 + total_count += 1; 381 + } 382 + 383 + should_include 384 + }) 385 + .collect(); 386 + 387 + let mut html = String::new(); 388 + let mut last_end = 0; 389 + 390 + for facet in filtered_facets { 391 + // Add any text before this facet (HTML-escaped) 392 + if facet.index.byte_start > last_end { 393 + let text_before = std::str::from_utf8(&text_bytes[last_end..facet.index.byte_start]) 394 + .unwrap_or(""); 395 + html.push_str(&html_escape(text_before)); 396 + } 397 + 398 + // Get the text covered by this facet 399 + let facet_text = 400 + std::str::from_utf8(&text_bytes[facet.index.byte_start..facet.index.byte_end]) 401 + .unwrap_or(""); 402 + 403 + // Process the facet based on its feature type 404 + // Only process the first feature (in practice, there should only be one per facet) 405 + if let Some(feature) = facet.features.first() { 406 + match feature { 407 + FacetFeature::Mention(mention) => { 408 + write!( 409 + &mut html, 410 + r#"<a href="/u/{}">{}</a>"#, 411 + html_escape(&mention.did), 412 + html_escape(facet_text) 413 + ) 414 + .unwrap(); 415 + } 416 + FacetFeature::Link(link) => { 417 + // Only create link tags for safe URLs 418 + if link.uri.starts_with("http://") 419 + || link.uri.starts_with("https://") 420 + || link.uri.starts_with("/") 421 + { 422 + write!( 423 + &mut html, 424 + r#"<a href="{}" target="_blank" rel="noopener noreferrer">{}</a>"#, 425 + html_escape(&link.uri), 426 + html_escape(facet_text) 427 + ) 428 + .unwrap(); 429 + } else { 430 + // For unsafe URLs (like javascript:), just render as plain text 431 + html.push_str(&html_escape(facet_text)); 432 + } 433 + } 434 + FacetFeature::Tag(tag) => { 435 + // URL-encode the tag for the href attribute 436 + let encoded_tag = urlencoding::encode(&tag.tag); 437 + write!( 438 + &mut html, 439 + r#"<a href="/t/{}">{}</a>"#, 440 + encoded_tag, 441 + html_escape(facet_text) 442 + ) 443 + .unwrap(); 444 + } 445 + } 446 + } 447 + 448 + last_end = facet.index.byte_end; 449 + } 450 + 451 + // Add any remaining text after the last facet 452 + if last_end < text_bytes.len() { 453 + let remaining_text = std::str::from_utf8(&text_bytes[last_end..]).unwrap_or(""); 454 + html.push_str(&html_escape(remaining_text)); 455 + } 456 + 457 + // Sanitize the final HTML output to ensure safety 458 + // Configure ammonia to only allow <a> tags with specific attributes 459 + let mut builder = ammonia::Builder::new(); 460 + builder 461 + .tags(std::collections::HashSet::from(["a", "br"])) 462 + // Don't automatically add rel="nofollow" - we'll handle it in the attribute filter 463 + .link_rel(None) 464 + // Allow relative URLs (for internal links like /u/... and /t/...) 465 + .url_relative(ammonia::UrlRelative::PassThrough) 466 + .attribute_filter(|element, attribute, value| match (element, attribute) { 467 + ("a", "href") => { 468 + // Only allow safe URLs: relative paths starting with /, or http(s) URLs 469 + if value.starts_with('/') 470 + || value.starts_with("http://") 471 + || value.starts_with("https://") 472 + { 473 + Some(value.into()) 474 + } else { 475 + None 476 + } 477 + } 478 + ("a", "target") => { 479 + if value == "_blank" { 480 + Some(value.into()) 481 + } else { 482 + None 483 + } 484 + } 485 + ("a", "rel") => { 486 + // For external links, ensure nofollow is present 487 + if value.contains("noopener") || value.contains("noreferrer") { 488 + // Keep the existing rel value but add nofollow if not present 489 + if !value.contains("nofollow") { 490 + Some(format!("{} nofollow", value).into()) 491 + } else { 492 + Some(value.into()) 493 + } 494 + } else { 495 + // Just nofollow for other cases 496 + Some("nofollow".into()) 497 + } 498 + } 499 + ("br", _) => None, // br tags don't have attributes 500 + _ => None, 501 + }); 502 + 503 + builder.clean(&html).to_string() 504 + } 505 + 506 + #[cfg(test)] 507 + mod tests { 508 + use atproto_identity::model::Document; 509 + use atproto_record::lexicon::app::bsky::richtext::facet::{ByteSlice, Link, Mention, Tag}; 510 + use async_trait::async_trait; 511 + use std::collections::HashMap; 512 + 513 + use super::*; 514 + 515 + /// Mock identity resolver for testing 516 + struct MockIdentityResolver { 517 + handles_to_dids: HashMap<String, String>, 518 + } 519 + 520 + impl MockIdentityResolver { 521 + fn new() -> Self { 522 + let mut handles_to_dids = HashMap::new(); 523 + handles_to_dids.insert( 524 + "alice.bsky.social".to_string(), 525 + "did:plc:alice123".to_string(), 526 + ); 527 + handles_to_dids.insert( 528 + "at://alice.bsky.social".to_string(), 529 + "did:plc:alice123".to_string(), 530 + ); 531 + Self { handles_to_dids } 532 + } 533 + 534 + fn add_identity(&mut self, handle: &str, did: &str) { 535 + self.handles_to_dids 536 + .insert(handle.to_string(), did.to_string()); 537 + self.handles_to_dids 538 + .insert(format!("at://{}", handle), did.to_string()); 539 + } 540 + } 541 + 542 + #[async_trait] 543 + impl IdentityResolver for MockIdentityResolver { 544 + async fn resolve(&self, handle: &str) -> anyhow::Result<Document> { 545 + let handle_key = if handle.starts_with("at://") { 546 + handle.to_string() 547 + } else { 548 + handle.to_string() 549 + }; 550 + 551 + if let Some(did) = self.handles_to_dids.get(&handle_key) { 552 + Ok(Document { 553 + context: vec![], 554 + id: did.clone(), 555 + also_known_as: vec![format!("at://{}", handle_key.trim_start_matches("at://"))], 556 + verification_method: vec![], 557 + service: vec![], 558 + extra: HashMap::new(), 559 + }) 560 + } else { 561 + Err(anyhow::anyhow!("Handle not found")) 562 + } 563 + } 564 + } 565 + 566 + #[test] 567 + fn test_html_escape() { 568 + assert_eq!(html_escape("Hello & <world>"), "Hello &amp; &lt;world&gt;"); 569 + assert_eq!( 570 + html_escape("\"quotes\" and 'apostrophes'"), 571 + "&quot;quotes&quot; and &#39;apostrophes&#39;" 572 + ); 573 + assert_eq!(html_escape("Line 1\nLine 2"), "Line 1\nLine 2"); 574 + assert_eq!(html_escape("Normal text"), "Normal text"); 575 + } 576 + 577 + #[test] 578 + fn test_render_no_facets() { 579 + let text = "This is a <test> description & it's great!"; 580 + let limits = FacetLimits::default(); 581 + let html = render_text_with_facets_html(text, None, &limits); 582 + // HTML tags are detected and stripped by ammonia 583 + // The <test> tag is removed entirely 584 + assert_eq!(html, "This is a description &amp; it's great!"); 585 + } 586 + 587 + #[test] 588 + fn test_render_with_html_tags() { 589 + let text = "Check this <script>alert('XSS')</script> content!"; 590 + let limits = FacetLimits::default(); 591 + let html = render_text_with_facets_html(text, None, &limits); 592 + // The script tag should be completely removed 593 + assert_eq!(html, "Check this content!"); 594 + assert!(!html.contains("script")); 595 + assert!(!html.contains("alert")); 596 + } 597 + 598 + #[test] 599 + fn test_render_with_mention() { 600 + let text = "Contact @alice.bsky.social for details"; 601 + let limits = FacetLimits::default(); 602 + let facets = vec![Facet { 603 + index: ByteSlice { 604 + byte_start: 8, 605 + byte_end: 26, 606 + }, 607 + features: vec![FacetFeature::Mention(Mention { 608 + did: "did:plc:abc123".to_string(), 609 + })], 610 + }]; 611 + 612 + let html = render_text_with_facets_html(text, Some(&facets), &limits); 613 + assert_eq!( 614 + html, 615 + r#"Contact <a href="/u/did:plc:abc123">@alice.bsky.social</a> for details"# 616 + ); 617 + } 618 + 619 + #[test] 620 + fn test_render_with_link() { 621 + let text = "Apply at https://example.com today!"; 622 + let limits = FacetLimits::default(); 623 + let facets = vec![Facet { 624 + index: ByteSlice { 625 + byte_start: 9, 626 + byte_end: 28, 627 + }, 628 + features: vec![FacetFeature::Link(Link { 629 + uri: "https://example.com".to_string(), 630 + })], 631 + }]; 632 + 633 + let html = render_text_with_facets_html(text, Some(&facets), &limits); 634 + assert_eq!( 635 + html, 636 + r#"Apply at <a href="https://example.com">https://example.com</a> today!"# 637 + ); 638 + } 639 + 640 + #[test] 641 + fn test_render_with_tag() { 642 + let text = "Looking for #rust developers"; 643 + let limits = FacetLimits::default(); 644 + let facets = vec![Facet { 645 + index: ByteSlice { 646 + byte_start: 12, 647 + byte_end: 17, 648 + }, 649 + features: vec![FacetFeature::Tag(Tag { 650 + tag: "rust".to_string(), 651 + })], 652 + }]; 653 + 654 + let html = render_text_with_facets_html(text, Some(&facets), &limits); 655 + assert_eq!( 656 + html, 657 + r#"Looking for <a href="/t/rust">#rust</a> developers"# 658 + ); 659 + } 660 + 661 + #[tokio::test] 662 + async fn test_parse_facets_from_text_comprehensive() { 663 + let mut resolver = MockIdentityResolver::new(); 664 + resolver.add_identity("bob.test.com", "did:plc:bob456"); 665 + 666 + let limits = FacetLimits::default(); 667 + let text = "Join @alice.bsky.social and @bob.test.com at https://example.com #rust #golang"; 668 + let facets = parse_facets_from_text(text, &resolver, &limits).await; 669 + 670 + assert!(facets.is_some()); 671 + let facets = facets.unwrap(); 672 + assert_eq!(facets.len(), 5); // 2 mentions, 1 URL, 2 hashtags 673 + 674 + // Check first mention 675 + assert_eq!(facets[0].index.byte_start, 5); 676 + assert_eq!(facets[0].index.byte_end, 23); 677 + if let FacetFeature::Mention(ref mention) = facets[0].features[0] { 678 + assert_eq!(mention.did, "did:plc:alice123"); 679 + } else { 680 + panic!("Expected Mention feature"); 681 + } 682 + 683 + // Check second mention 684 + assert_eq!(facets[1].index.byte_start, 28); 685 + assert_eq!(facets[1].index.byte_end, 41); 686 + if let FacetFeature::Mention(ref mention) = facets[1].features[0] { 687 + assert_eq!(mention.did, "did:plc:bob456"); 688 + } else { 689 + panic!("Expected Mention feature"); 690 + } 691 + 692 + // Check URL 693 + assert_eq!(facets[2].index.byte_start, 45); 694 + assert_eq!(facets[2].index.byte_end, 64); 695 + if let FacetFeature::Link(ref link) = facets[2].features[0] { 696 + assert_eq!(link.uri, "https://example.com"); 697 + } else { 698 + panic!("Expected Link feature"); 699 + } 700 + 701 + // Check first hashtag 702 + assert_eq!(facets[3].index.byte_start, 65); 703 + assert_eq!(facets[3].index.byte_end, 70); 704 + if let FacetFeature::Tag(ref tag) = facets[3].features[0] { 705 + assert_eq!(tag.tag, "rust"); 706 + } else { 707 + panic!("Expected Tag feature"); 708 + } 709 + 710 + // Check second hashtag 711 + assert_eq!(facets[4].index.byte_start, 71); 712 + assert_eq!(facets[4].index.byte_end, 78); 713 + if let FacetFeature::Tag(ref tag) = facets[4].features[0] { 714 + assert_eq!(tag.tag, "golang"); 715 + } else { 716 + panic!("Expected Tag feature"); 717 + } 718 + } 719 + 720 + #[tokio::test] 721 + async fn test_parse_facets_from_text_with_unresolvable_mention() { 722 + let resolver = MockIdentityResolver::new(); 723 + let limits = FacetLimits::default(); 724 + 725 + // Only alice.bsky.social is in the resolver, not unknown.handle.com 726 + let text = "Contact @unknown.handle.com for details #rust"; 727 + let facets = parse_facets_from_text(text, &resolver, &limits).await; 728 + 729 + assert!(facets.is_some()); 730 + let facets = facets.unwrap(); 731 + // Should only have 1 facet (the hashtag) since the mention couldn't be resolved 732 + assert_eq!(facets.len(), 1); 733 + 734 + // Check that it's the hashtag facet 735 + if let FacetFeature::Tag(ref tag) = facets[0].features[0] { 736 + assert_eq!(tag.tag, "rust"); 737 + } else { 738 + panic!("Expected Tag feature"); 739 + } 740 + } 741 + 742 + #[tokio::test] 743 + async fn test_parse_facets_from_text_empty() { 744 + let resolver = MockIdentityResolver::new(); 745 + let limits = FacetLimits::default(); 746 + let text = "No mentions, URLs, or hashtags here"; 747 + let facets = parse_facets_from_text(text, &resolver, &limits).await; 748 + assert!(facets.is_none()); 749 + } 750 + 751 + #[tokio::test] 752 + async fn test_parse_facets_from_text_url_with_at_mention() { 753 + let resolver = MockIdentityResolver::new(); 754 + let limits = FacetLimits::default(); 755 + 756 + // URLs with @ should not create mention facets 757 + let text = "Tangled https://tangled.org/@smokesignal.events"; 758 + let facets = parse_facets_from_text(text, &resolver, &limits).await; 759 + 760 + assert!(facets.is_some()); 761 + let facets = facets.unwrap(); 762 + 763 + // Should have exactly 1 facet (the URL), not 2 (URL + mention) 764 + assert_eq!( 765 + facets.len(), 766 + 1, 767 + "Expected 1 facet (URL only), got {}", 768 + facets.len() 769 + ); 770 + 771 + // Verify it's a link facet, not a mention 772 + if let FacetFeature::Link(ref link) = facets[0].features[0] { 773 + assert_eq!(link.uri, "https://tangled.org/@smokesignal.events"); 774 + } else { 775 + panic!("Expected Link feature, got Mention or Tag instead"); 776 + } 777 + } 778 + 779 + #[tokio::test] 780 + async fn test_parse_facets_with_mention_limit() { 781 + let mut resolver = MockIdentityResolver::new(); 782 + resolver.add_identity("bob.test.com", "did:plc:bob456"); 783 + resolver.add_identity("charlie.test.com", "did:plc:charlie789"); 784 + 785 + // Limit to 2 mentions 786 + let limits = FacetLimits { 787 + mentions_max: 2, 788 + tags_max: 5, 789 + links_max: 5, 790 + max: 10, 791 + }; 792 + 793 + let text = "Join @alice.bsky.social @bob.test.com @charlie.test.com"; 794 + let facets = parse_facets_from_text(text, &resolver, &limits).await; 795 + 796 + assert!(facets.is_some()); 797 + let facets = facets.unwrap(); 798 + // Should only have 2 mentions (alice and bob), charlie should be skipped 799 + assert_eq!(facets.len(), 2); 800 + 801 + // Verify they're both mentions 802 + for facet in &facets { 803 + assert!(matches!(facet.features[0], FacetFeature::Mention(_))); 804 + } 805 + } 806 + 807 + #[tokio::test] 808 + async fn test_parse_facets_with_global_limit() { 809 + let mut resolver = MockIdentityResolver::new(); 810 + resolver.add_identity("bob.test.com", "did:plc:bob456"); 811 + 812 + // Very restrictive global limit 813 + let limits = FacetLimits { 814 + mentions_max: 5, 815 + tags_max: 5, 816 + links_max: 5, 817 + max: 3, // Only allow 3 total facets 818 + }; 819 + 820 + let text = "Join @alice.bsky.social @bob.test.com at https://example.com #rust #golang #python"; 821 + let facets = parse_facets_from_text(text, &resolver, &limits).await; 822 + 823 + assert!(facets.is_some()); 824 + let facets = facets.unwrap(); 825 + // Should be truncated to 3 facets total 826 + assert_eq!(facets.len(), 3); 827 + } 828 + 829 + #[test] 830 + fn test_render_with_facet_limits() { 831 + let text = "Contact @alice @bob @charlie for details"; 832 + let limits = FacetLimits { 833 + mentions_max: 2, // Only render first 2 mentions 834 + tags_max: 5, 835 + links_max: 5, 836 + max: 10, 837 + }; 838 + 839 + let facets = vec![ 840 + Facet { 841 + index: ByteSlice { 842 + byte_start: 8, 843 + byte_end: 14, 844 + }, 845 + features: vec![FacetFeature::Mention(Mention { 846 + did: "did:plc:alice".to_string(), 847 + })], 848 + }, 849 + Facet { 850 + index: ByteSlice { 851 + byte_start: 15, 852 + byte_end: 19, 853 + }, 854 + features: vec![FacetFeature::Mention(Mention { 855 + did: "did:plc:bob".to_string(), 856 + })], 857 + }, 858 + Facet { 859 + index: ByteSlice { 860 + byte_start: 20, 861 + byte_end: 28, 862 + }, 863 + features: vec![FacetFeature::Mention(Mention { 864 + did: "did:plc:charlie".to_string(), 865 + })], 866 + }, 867 + ]; 868 + 869 + let html = render_text_with_facets_html(text, Some(&facets), &limits); 870 + // Should only render first 2 mentions, third should be plain text 871 + assert!(html.contains(r#"<a href="/u/did:plc:alice">@alice</a>"#)); 872 + assert!(html.contains(r#"<a href="/u/did:plc:bob">@bob</a>"#)); 873 + // Charlie should NOT be a link due to mention limit 874 + assert!(!html.contains(r#"<a href="/u/did:plc:charlie">"#)); 875 + } 876 + 877 + #[test] 878 + fn test_render_malicious_link() { 879 + let text = "Visit example.com for details"; 880 + let limits = FacetLimits::default(); 881 + let facets = vec![Facet { 882 + index: ByteSlice { 883 + byte_start: 6, 884 + byte_end: 17, 885 + }, 886 + features: vec![FacetFeature::Link(Link { 887 + uri: "javascript:alert('XSS')".to_string(), 888 + })], 889 + }]; 890 + 891 + let html = render_text_with_facets_html(text, Some(&facets), &limits); 892 + // JavaScript URLs should be blocked 893 + assert!(!html.contains("javascript:")); 894 + assert_eq!(html, "Visit example.com for details"); 895 + } 896 + }
+25 -2
src/http/event_view.rs
··· 42 42 pub name: String, 43 43 pub description: Option<String>, 44 44 pub description_short: Option<String>, 45 + pub description_html: Option<String>, 45 46 46 47 pub count_going: u32, 47 48 pub count_notgoing: u32, ··· 55 56 pub header: Option<(String, String)>, // (cid, alt text) 56 57 } 57 58 58 - impl TryFrom<(Option<&IdentityProfile>, Option<&IdentityProfile>, &Event)> for EventView { 59 + impl TryFrom<(Option<&IdentityProfile>, Option<&IdentityProfile>, &Event, &crate::facets::FacetLimits)> for EventView { 59 60 type Error = anyhow::Error; 60 61 61 62 fn try_from( 62 - (viewer, organizer, event): (Option<&IdentityProfile>, Option<&IdentityProfile>, &Event), 63 + (viewer, organizer, event, facet_limits): (Option<&IdentityProfile>, Option<&IdentityProfile>, &Event, &crate::facets::FacetLimits), 63 64 ) -> Result<Self, Self::Error> { 64 65 // Time zones are used to display date/time values from the perspective 65 66 // of the viewer. The timezone is selected with this priority: ··· 147 148 .as_ref() 148 149 .map(|value| truncate_text(value, 200, Some("...".to_string())).to_string()); 149 150 151 + // Extract facets from the event record and render HTML description 152 + let description_html = if let Some(desc_text) = &description { 153 + // Try to extract facets from the event record's extra fields 154 + let facets = event 155 + .record 156 + .as_object() 157 + .and_then(|obj| obj.get("facets")) 158 + .and_then(|facets_value| { 159 + serde_json::from_value::<Vec<atproto_record::lexicon::app::bsky::richtext::facet::Facet>>(facets_value.clone()).ok() 160 + }); 161 + 162 + // Render the description with facets 163 + Some(crate::facets::render_text_with_facets_html( 164 + desc_text, 165 + facets.as_ref(), 166 + facet_limits, 167 + )) 168 + } else { 169 + None 170 + }; 171 + 150 172 let starts_at_human = starts_at.as_ref().map(|value| { 151 173 value 152 174 .with_timezone(&tz) ··· 209 231 name, 210 232 description, 211 233 description_short, 234 + description_html, 212 235 count_going: 0, 213 236 count_notgoing: 0, 214 237 count_interested: 0,
+26 -4
src/http/handle_create_event.rs
··· 255 255 None => vec![], 256 256 }; 257 257 258 + // Parse facets from description if present 259 + let description = build_event_form 260 + .description 261 + .clone() 262 + .ok_or(CreateEventError::DescriptionNotSet)?; 263 + 264 + let facet_limits = crate::facets::FacetLimits { 265 + mentions_max: web_context.config.facets_mentions_max, 266 + tags_max: web_context.config.facets_tags_max, 267 + links_max: web_context.config.facets_links_max, 268 + max: web_context.config.facets_max, 269 + }; 270 + 271 + let facets = if !description.is_empty() { 272 + crate::facets::parse_facets_from_text( 273 + &description, 274 + web_context.identity_resolver.as_ref(), 275 + &facet_limits, 276 + ) 277 + .await 278 + } else { 279 + None 280 + }; 281 + 258 282 let the_record = Event { 259 283 name: build_event_form 260 284 .name 261 285 .clone() 262 286 .ok_or(CreateEventError::NameNotSet)?, 263 - description: build_event_form 264 - .description 265 - .clone() 266 - .ok_or(CreateEventError::DescriptionNotSet)?, 287 + description, 267 288 created_at: now, 268 289 starts_at, 269 290 ends_at, ··· 272 293 locations, 273 294 uris: links, 274 295 media: Vec::default(), 296 + facets, 275 297 extra: HashMap::default(), 276 298 }; 277 299
+28 -4
src/http/handle_edit_event.rs
··· 549 549 // Extract existing extra fields from the original record 550 550 let extra = community_event.extra.clone(); 551 551 552 + // Parse facets from updated description or preserve existing facets 553 + let description = build_event_form 554 + .description 555 + .clone() 556 + .ok_or(CommonError::FieldRequired)?; 557 + 558 + let facet_limits = crate::facets::FacetLimits { 559 + mentions_max: ctx.web_context.config.facets_mentions_max, 560 + tags_max: ctx.web_context.config.facets_tags_max, 561 + links_max: ctx.web_context.config.facets_links_max, 562 + max: ctx.web_context.config.facets_max, 563 + }; 564 + 565 + let facets = if !description.is_empty() { 566 + // Extract facets from the updated description 567 + crate::facets::parse_facets_from_text( 568 + &description, 569 + ctx.web_context.identity_resolver.as_ref(), 570 + &facet_limits, 571 + ) 572 + .await 573 + } else { 574 + // If description is empty, preserve existing facets 575 + community_event.facets.clone() 576 + }; 577 + 552 578 let updated_record = LexiconCommunityEvent { 553 579 name: build_event_form 554 580 .name 555 581 .clone() 556 582 .ok_or(CommonError::FieldRequired)?, 557 - description: build_event_form 558 - .description 559 - .clone() 560 - .ok_or(CommonError::FieldRequired)?, 583 + description, 561 584 created_at: community_event.created_at, 562 585 starts_at, 563 586 ends_at, ··· 566 589 locations, 567 590 uris, 568 591 media: Vec::default(), 592 + facets, 569 593 extra, // Use the preserved extra fields 570 594 }; 571 595
+8
src/http/handle_profile.rs
··· 416 416 let organizer_handlers = 417 417 hydrate_event_organizers(&ctx.web_context.pool, &events).await?; 418 418 419 + let facet_limits = crate::facets::FacetLimits { 420 + mentions_max: ctx.web_context.config.facets_mentions_max, 421 + tags_max: ctx.web_context.config.facets_tags_max, 422 + links_max: ctx.web_context.config.facets_links_max, 423 + max: ctx.web_context.config.facets_max, 424 + }; 425 + 419 426 let mut events = events 420 427 .iter() 421 428 .filter_map(|event_view| { ··· 424 431 ctx.current_handle.as_ref(), 425 432 organizer_maybe, 426 433 &event_view.event, 434 + &facet_limits, 427 435 )) 428 436 .ok() 429 437 })
+8
src/http/handle_view_event.rs
··· 225 225 } 226 226 }; 227 227 228 + let facet_limits = crate::facets::FacetLimits { 229 + mentions_max: ctx.web_context.config.facets_mentions_max, 230 + tags_max: ctx.web_context.config.facets_tags_max, 231 + links_max: ctx.web_context.config.facets_links_max, 232 + max: ctx.web_context.config.facets_max, 233 + }; 234 + 228 235 EventView::try_from(( 229 236 ctx.current_handle.as_ref(), 230 237 organizer_handle.as_ref(), 231 238 event, 239 + &facet_limits, 232 240 )) 233 241 } 234 242 Err(err) => Err(ViewEventError::EventNotFound(err.to_string()).into()),
+1
src/lib.rs
··· 3 3 pub mod config_errors; 4 4 pub mod consumer; 5 5 pub mod errors; 6 + pub mod facets; 6 7 pub mod http; 7 8 pub mod i18n; 8 9 pub mod key_provider;
+1
src/task_search_indexer.rs
··· 69 69 "handle": { "type": "keyword" }, 70 70 "name": { "type": "text" }, 71 71 "description": { "type": "text" }, 72 + "tags": { "type": "keyword" }, 72 73 "start_time": { "type": "date" }, 73 74 "end_time": { "type": "date" }, 74 75 "created_at": { "type": "date" },