use crate::{ atproto::lexicon::{ community::lexicon::calendar::event::NSID, events::smokesignal::calendar::event::NSID as LegacyNSID, }, http::errors::UrlError, }; use regex::Regex; use std::sync::LazyLock; pub type QueryParam<'a> = (&'a str, &'a str); pub type QueryParams<'a> = Vec>; pub fn stringify(query: QueryParams) -> String { query.iter().fold(String::new(), |acc, &tuple| { acc + tuple.0 + "=" + tuple.1 + "&" }) } pub struct URLBuilder { host: String, path: String, params: Vec<(String, String)>, } pub fn build_url(host: &str, path: &str, params: Vec>) -> String { let mut url_builder = URLBuilder::new(host); url_builder.path(path); for (key, value) in params.iter().filter_map(|x| *x) { url_builder.param(key, value); } url_builder.build() } impl URLBuilder { pub fn new(host: &str) -> URLBuilder { let host = if host.starts_with("https://") { host.to_string() } else { format!("https://{}", host) }; let host = if let Some(trimmed) = host.strip_suffix('/') { trimmed.to_string() } else { host }; URLBuilder { host: host.to_string(), params: vec![], path: "/".to_string(), } } pub fn param(&mut self, key: &str, value: &str) -> &mut Self { self.params .push((key.to_owned(), urlencoding::encode(value).to_string())); self } pub fn path(&mut self, path: &str) -> &mut Self { path.clone_into(&mut self.path); self } pub fn build(self) -> String { let mut url_params = String::new(); if !self.params.is_empty() { url_params.push('?'); let qs_args = self.params.iter().map(|(k, v)| (&**k, &**v)).collect(); url_params.push_str(stringify(qs_args).as_str()); } format!("{}{}{}", self.host, self.path, url_params) } } pub fn url_from_aturi(external_base: &str, aturi: &str) -> Result { let aturi = aturi.strip_prefix("at://").unwrap_or(aturi); let parts = aturi.split("/").collect::>(); if parts.len() == 3 && parts[1] == NSID { let path = format!("/{}/{}", parts[0], parts[2]); return Ok(build_url(external_base, &path, vec![])); } if parts.len() == 3 && parts[1] == LegacyNSID { let path = format!("/{}/{}", parts[0], parts[2]); return Ok(build_url(external_base, &path, vec![])); } Err(UrlError::UnsupportedCollection) } fn find_char_bytes_len(ch: &char) -> i32 { let mut b = [0; 4]; ch.encode_utf8(&mut b); let mut clen = 0; for a in b.iter() { clen += match a { 0 => 0, _ => 1, } } clen } pub fn truncate_text(text: &str, tlen: usize, suffix: Option) -> String { if text.len() <= tlen { return text.to_string(); } let c = text.chars().nth(tlen); let ret = match c { Some(s) => match char::is_whitespace(s) { true => text.split_at(tlen).0, false => { let chars: Vec<_> = text.chars().collect(); let truncated = chars.split_at(tlen); let mut first_len = 0; for ch in truncated.0.iter() { first_len += find_char_bytes_len(ch); } let mut prev_ws = first_len - 1; for ch in truncated.0.iter().rev() { if char::is_whitespace(*ch) { break; } prev_ws -= find_char_bytes_len(ch); } let mut next_ws = first_len + 1; for ch in truncated.1.iter() { let mut b = [0; 4]; ch.encode_utf8(&mut b); if char::is_whitespace(*ch) { break; } next_ws += find_char_bytes_len(ch); } match next_ws > prev_ws && prev_ws > 0 { true => text.split_at(prev_ws as usize).0, false => text.split_at(next_ws as usize).1, } } }, None => text, }; if ret.len() < text.len() { if let Some(suffix) = suffix { return format!("{} {}", ret, suffix.clone()); } } ret.to_string() } /// Convert a handle to a URL-safe slug format /// /// This function takes a handle (which may be in various formats like `example.com`, /// `@example.com`, `did:web:example.com`, or `did:plc:abc123`) and converts it to /// a URL-safe slug that can be used in URL paths. /// /// # Arguments /// * `handle` - The handle to convert to a slug /// /// # Returns /// * A URL-safe slug string pub fn slug_from_handle(handle: &str) -> String { // Strip common prefixes to get the core handle let trimmed = if let Some(value) = handle.strip_prefix("at://") { value } else if let Some(value) = handle.strip_prefix('@') { value } else { handle }; // For DID formats, we need to handle them specially if let Some(web_handle) = trimmed.strip_prefix("did:web:") { // For did:web: format, use the domain part web_handle.to_string() } else if trimmed.starts_with("did:plc:") { // For did:plc: format, use the full DID as the slug trimmed.to_string() } else { // For regular handles, use as-is (they're already domain-like) trimmed.to_string() } } /// Regular expression for matching URLs static URL_REGEX: LazyLock = LazyLock::new(|| { Regex::new(r"https?://[^\s<>\[\]{}|\\^`]+").expect("Failed to compile URL regex") }); /// Convert URLs in text to clickable HTML links while escaping other HTML content /// /// This function finds URLs in the input text and converts them to clickable anchor tags /// while properly escaping any other HTML content to prevent XSS attacks. /// /// # Arguments /// * `text` - The input text that may contain URLs /// /// # Returns /// * A string with URLs converted to HTML anchor tags and other content HTML-escaped pub fn convert_urls_to_links(text: &str) -> String { tracing::debug!("convert_urls_to_links called with text: {}", text); let mut result = String::new(); let mut last_end = 0; for url_match in URL_REGEX.find_iter(text) { // Add the text before this URL (HTML escaped) let before_url = &text[last_end..url_match.start()]; result.push_str(&html_escape::encode_text(before_url)); // Add the URL as a clickable link let url = url_match.as_str(); let href = if url.starts_with("http://") || url.starts_with("https://") { url.to_string() } else { format!("https://{}", url) }; result.push_str(&format!( r#"{}"#, html_escape::encode_quoted_attribute(&href), html_escape::encode_text(url) )); last_end = url_match.end(); } // Add any remaining text after the last URL (HTML escaped) let remaining = &text[last_end..]; result.push_str(&html_escape::encode_text(remaining)); tracing::debug!("convert_urls_to_links result: {}", result); result }