A better Rust ATProto crate

handle lowercase normalization

Orual e45485de 0a7353bc

+70 -21
+70 -21
crates/jacquard-common/src/types/handle.rs
··· 1 1 use crate::types::string::AtStrError; 2 2 use crate::types::{DISALLOWED_TLDS, ends_with}; 3 3 use crate::{CowStr, IntoStatic}; 4 + use alloc::string::{String, ToString}; 5 + use core::fmt; 6 + use core::ops::Deref; 7 + use core::str::FromStr; 4 8 #[cfg(all(not(target_arch = "wasm32"), feature = "std"))] 5 9 use regex::Regex; 6 - #[cfg(target_arch = "wasm32")] 7 - use regex_lite::Regex; 8 10 #[cfg(all(not(target_arch = "wasm32"), not(feature = "std")))] 9 11 use regex_automata::meta::Regex; 12 + #[cfg(target_arch = "wasm32")] 13 + use regex_lite::Regex; 10 14 use serde::{Deserialize, Deserializer, Serialize, de::Error}; 11 - use smol_str::{SmolStr, ToSmolStr}; 12 - use alloc::string::{String, ToString}; 13 - use core::fmt; 14 - use core::ops::Deref; 15 - use core::str::FromStr; 15 + use smol_str::{SmolStr, StrExt}; 16 16 17 17 use super::Lazy; 18 18 ··· 47 47 /// 48 48 /// Accepts (and strips) preceding '@' or 'at://' if present 49 49 pub fn new(handle: &'h str) -> Result<Self, AtStrError> { 50 + if handle.contains(|c: char| c.is_ascii_uppercase()) { 51 + return Self::new_owned(handle); 52 + } 50 53 let stripped = handle 51 54 .strip_prefix("at://") 52 55 .or_else(|| handle.strip_prefix('@')) 53 56 .unwrap_or(handle); 54 - 55 57 if stripped.len() > 253 { 56 58 Err(AtStrError::too_long( 57 59 "handle", ··· 66 68 SmolStr::new_static("invalid"), 67 69 )) 68 70 } else if ends_with(stripped, DISALLOWED_TLDS) { 69 - // speicifically pass this through as it is returned in instances where someone 71 + // specifically pass this through as it is returned in instances where someone 70 72 // has screwed up their handle, and it's awkward to fail so early 71 73 if handle == "handle.invalid" { 72 74 Ok(Self(CowStr::Borrowed(stripped))) ··· 93 95 .strip_prefix("at://") 94 96 .or_else(|| handle.strip_prefix('@')) 95 97 .unwrap_or(handle); 96 - let handle = stripped; 98 + let normalized = stripped.to_lowercase_smolstr(); 99 + let handle = normalized.as_str(); 97 100 if handle.len() > 253 { 98 101 Err(AtStrError::too_long("handle", handle, 253, handle.len())) 99 102 } else if !HANDLE_REGEX.is_match(handle) { ··· 103 106 SmolStr::new_static("invalid"), 104 107 )) 105 108 } else if ends_with(handle, DISALLOWED_TLDS) { 106 - Err(AtStrError::disallowed("handle", handle, DISALLOWED_TLDS)) 109 + // specifically pass this through as it is returned in instances where someone 110 + // has screwed up their handle, and it's awkward to fail so early 111 + if handle == "handle.invalid" { 112 + Ok(Self(CowStr::Owned(normalized))) 113 + } else { 114 + Err(AtStrError::disallowed( 115 + "handle", 116 + normalized.as_str(), 117 + DISALLOWED_TLDS, 118 + )) 119 + } 107 120 } else { 108 - Ok(Self(CowStr::Owned(handle.to_smolstr()))) 121 + Ok(Self(CowStr::Owned(normalized))) 109 122 } 110 123 } 111 124 ··· 115 128 .strip_prefix("at://") 116 129 .or_else(|| handle.strip_prefix('@')) 117 130 .unwrap_or(handle); 118 - let handle = stripped; 131 + 132 + let handle = if handle.contains(|c: char| c.is_ascii_uppercase()) { 133 + stripped.to_lowercase_smolstr() 134 + } else { 135 + SmolStr::new_static(stripped) 136 + }; 119 137 if handle.len() > 253 { 120 - Err(AtStrError::too_long("handle", handle, 253, handle.len())) 121 - } else if !HANDLE_REGEX.is_match(handle) { 138 + Err(AtStrError::too_long("handle", &handle, 253, handle.len())) 139 + } else if !HANDLE_REGEX.is_match(&handle) { 122 140 Err(AtStrError::regex( 123 141 "handle", 124 - handle, 142 + &handle, 125 143 SmolStr::new_static("invalid"), 126 144 )) 127 - } else if ends_with(handle, DISALLOWED_TLDS) { 128 - Err(AtStrError::disallowed("handle", handle, DISALLOWED_TLDS)) 145 + } else if ends_with(&handle, DISALLOWED_TLDS) { 146 + // specifically pass this through as it is returned in instances where someone 147 + // has screwed up their handle, and it's awkward to fail so early 148 + if handle == "handle.invalid" { 149 + Ok(Self(CowStr::Owned(handle))) 150 + } else { 151 + Err(AtStrError::disallowed("handle", stripped, DISALLOWED_TLDS)) 152 + } 129 153 } else { 130 - Ok(Self(CowStr::new_static(handle))) 154 + Ok(Self(CowStr::Owned(handle))) 131 155 } 132 156 } 133 157 ··· 136 160 /// May allocate for a long handle with an at:// or @ prefix, otherwise borrows. 137 161 /// Accepts (and strips) preceding '@' or 'at://' if present 138 162 pub fn new_cow(handle: CowStr<'h>) -> Result<Self, AtStrError> { 163 + if handle.contains(|c: char| c.is_ascii_uppercase()) { 164 + return Self::new_owned(handle); 165 + } 139 166 let handle = if let Some(stripped) = handle.strip_prefix("at://") { 140 167 CowStr::copy_from_str(stripped) 141 168 } else if let Some(stripped) = handle.strip_prefix('@') { ··· 152 179 SmolStr::new_static("invalid"), 153 180 )) 154 181 } else if ends_with(&handle, DISALLOWED_TLDS) { 155 - Err(AtStrError::disallowed("handle", &handle, DISALLOWED_TLDS)) 182 + // specifically pass this through as it is returned in instances where someone 183 + // has screwed up their handle, and it's awkward to fail so early 184 + if handle == "handle.invalid" { 185 + Ok(Self(handle)) 186 + } else { 187 + Err(AtStrError::disallowed( 188 + "handle", 189 + handle.as_str(), 190 + DISALLOWED_TLDS, 191 + )) 192 + } 156 193 } else { 157 194 Ok(Self(handle)) 158 195 } ··· 165 202 /// 166 203 /// Accepts (and strips) preceding '@' or 'at://' if present 167 204 pub fn raw(handle: &'h str) -> Self { 205 + if handle.contains(|c: char| c.is_ascii_uppercase()) { 206 + return Self::new_owned(handle).expect("Invalid handle"); 207 + } 168 208 let stripped = handle 169 209 .strip_prefix("at://") 170 210 .or_else(|| handle.strip_prefix('@')) ··· 175 215 } else if !HANDLE_REGEX.is_match(handle) { 176 216 panic!("Invalid handle") 177 217 } else if ends_with(handle, DISALLOWED_TLDS) { 178 - panic!("top-level domain not allowed in handles") 218 + // specifically pass this through as it is returned in instances where someone 219 + // has screwed up their handle, and it's awkward to fail so early 220 + if handle == "handle.invalid" { 221 + Self(CowStr::Borrowed(stripped)) 222 + } else { 223 + panic!("top-level domain not allowed in handles") 224 + } 179 225 } else { 180 226 Self(CowStr::Borrowed(handle)) 181 227 } ··· 190 236 .strip_prefix("at://") 191 237 .or_else(|| handle.strip_prefix('@')) 192 238 .unwrap_or(handle); 239 + if stripped.contains(|c: char| c.is_ascii_uppercase()) { 240 + return Self(CowStr::Owned(stripped.to_lowercase_smolstr())); 241 + } 193 242 Self(CowStr::Borrowed(stripped)) 194 243 } 195 244