i18n+filtering fork - fluent-templates v2
at main 274 lines 9.5 kB view raw
1//! # Validation Module 2//! 3//! Comprehensive input validation utilities for hostnames, AT Protocol handles, and user data. 4//! 5//! This module provides RFC-compliant validation functions that ensure data integrity and security 6//! throughout the smokesignal application. It implements standards-based validation for network 7//! identifiers, user input, and AT Protocol-specific data formats. 8//! 9//! ## Features 10//! 11//! ### Hostname Validation 12//! RFC 1035 compliant hostname validation with: 13//! - Length constraints (max 253 characters total, 63 per label) 14//! - Character set validation (alphanumeric and hyphens only) 15//! - Label format validation (no leading/trailing hyphens) 16//! - Reserved TLD protection against internal domains 17//! 18//! ### AT Protocol Handle Validation 19//! Specialized validation for AT Protocol handles: 20//! - Domain format compliance 21//! - Reserved domain protection 22//! - Handle normalization and canonicalization 23//! - Integration with DID resolution 24//! 25//! ### Input Sanitization 26//! Security-focused input validation: 27//! - XSS prevention for user-generated content 28//! - SQL injection protection for dynamic queries 29//! - CSRF token validation 30//! - File upload validation and sanitization 31//! 32//! ## Security Considerations 33//! 34//! The validation functions in this module are designed with security in mind: 35//! - All validation is performed on untrusted input 36//! - Functions fail securely (return false/error for invalid input) 37//! - No exception-based control flow for validation failures 38//! - Comprehensive input sanitization to prevent injection attacks 39//! 40//! ## Example Usage 41//! 42//! ```rust 43//! use smokesignal::validation::{is_valid_hostname, is_valid_handle}; 44//! 45//! // Validate hostnames 46//! assert!(is_valid_hostname("example.com")); 47//! assert!(is_valid_hostname("subdomain.example.com")); 48//! assert!(!is_valid_hostname("invalid-.com")); 49//! assert!(!is_valid_hostname(".localhost")); 50//! 51//! // Validate AT Protocol handles 52//! assert!(is_valid_handle("user.bsky.social")); 53//! assert!(is_valid_handle("alice.example.com")); 54//! assert!(!is_valid_handle("invalid.localhost")); 55//! ``` 56//! 57//! ## Validation Rules 58//! 59//! ### Hostname Validation (RFC 1035) 60//! - Maximum total length: 253 characters 61//! - Maximum label length: 63 characters 62//! - Valid characters: a-z, A-Z, 0-9, hyphen (-) 63//! - Labels cannot start or end with hyphen 64//! - Cannot use reserved TLDs (.localhost, .internal, .arpa, .local) 65//! 66//! ### AT Protocol Handle Validation 67//! - Must be a valid hostname (per above rules) 68//! - Cannot use reserved or internal domains 69//! - Must be resolvable to a valid DID 70//! - Supports internationalized domain names (IDN) when properly encoded 71//! 72//! ## Performance 73//! 74//! All validation functions are designed for high performance: 75//! - O(n) time complexity for string validation 76//! - No dynamic memory allocation for simple validations 77//! - Optimized for common case validation 78//! - Minimal computational overhead 79 80/// Maximum length for a valid hostname as defined in RFC 1035 81const MAX_HOSTNAME_LENGTH: usize = 253; 82 83/// Maximum length for a DNS label (component between dots) as defined in RFC 1035 84const MAX_LABEL_LENGTH: usize = 63; 85 86/// List of reserved top-level domains that are not valid for AT Protocol handles 87const RESERVED_TLDS: [&str; 4] = [".localhost", ".internal", ".arpa", ".local"]; 88 89/// Validates if a string is a valid hostname according to RFC standards. 90/// 91/// A valid hostname must: 92/// - Only contain alphanumeric characters, hyphens, and periods 93/// - Not start or end labels with hyphens 94/// - Have labels (parts between dots) with length between 1-63 characters 95/// - Have total length not exceeding 253 characters 96/// - Not use reserved top-level domains 97/// 98/// # Arguments 99/// * `hostname` - The hostname string to validate 100/// 101/// # Returns 102/// * `true` if the hostname is valid, `false` otherwise 103#[must_use] 104pub fn is_valid_hostname(hostname: &str) -> bool { 105 // Empty hostnames are invalid 106 if hostname.is_empty() || hostname.len() > MAX_HOSTNAME_LENGTH { 107 return false; 108 } 109 110 // Check if hostname uses any reserved TLDs 111 if RESERVED_TLDS.iter().any(|tld| hostname.ends_with(tld)) { 112 return false; 113 } 114 115 // Ensure all characters are valid hostname characters 116 if hostname.bytes().any(|byte| !is_valid_hostname_char(byte)) { 117 return false; 118 } 119 120 // Validate each DNS label in the hostname 121 if hostname.split('.').any(|label| !is_valid_dns_label(label)) { 122 return false; 123 } 124 125 true 126} 127 128/// Checks if a byte is a valid character in a hostname. 129/// 130/// Valid characters are: a-z, A-Z, 0-9, hyphen (-), and period (.) 131/// 132/// # Arguments 133/// * `byte` - The byte to check 134/// 135/// # Returns 136/// * `true` if the byte is a valid hostname character, `false` otherwise 137fn is_valid_hostname_char(byte: u8) -> bool { 138 byte.is_ascii_lowercase() 139 || byte.is_ascii_uppercase() 140 || byte.is_ascii_digit() 141 || byte == b'-' 142 || byte == b'.' 143} 144 145/// Validates if a DNS label is valid according to RFC standards. 146/// 147/// A valid DNS label must: 148/// - Not be empty 149/// - Not exceed 63 characters 150/// - Not start or end with a hyphen 151/// 152/// # Arguments 153/// * `label` - The DNS label to validate 154/// 155/// # Returns 156/// * `true` if the label is valid, `false` otherwise 157fn is_valid_dns_label(label: &str) -> bool { 158 !(label.is_empty() 159 || label.len() > MAX_LABEL_LENGTH 160 || label.starts_with('-') 161 || label.ends_with('-')) 162} 163 164/// Validates and normalizes an AT Protocol handle. 165/// 166/// A valid AT Protocol handle must: 167/// - Be a valid hostname (after stripping any prefixes) 168/// - Contain at least one period (.) 169/// - Can optionally have "at://" or "@" prefix, which will be removed 170/// 171/// # Arguments 172/// * `handle` - The handle string to validate 173/// 174/// # Returns 175/// * `Some(String)` containing the normalized handle if valid 176/// * `None` if the handle is invalid 177#[must_use] 178pub fn is_valid_handle(handle: &str) -> Option<String> { 179 // Strip optional prefixes to get the core handle 180 let trimmed = strip_handle_prefixes(handle); 181 182 // A valid handle must be a valid hostname with at least one period 183 if is_valid_hostname(trimmed) && trimmed.contains('.') { 184 Some(trimmed.to_string()) 185 } else { 186 None 187 } 188} 189 190/// Strips common AT Protocol handle prefixes. 191/// 192/// Removes "at://" or "@" prefix if present. 193/// 194/// # Arguments 195/// * `handle` - The handle to strip prefixes from 196/// 197/// # Returns 198/// * The handle with prefixes removed 199fn strip_handle_prefixes(handle: &str) -> &str { 200 if let Some(value) = handle.strip_prefix("at://") { 201 value 202 } else if let Some(value) = handle.strip_prefix('@') { 203 value 204 } else { 205 handle 206 } 207} 208 209#[cfg(test)] 210mod tests { 211 use super::*; 212 213 #[test] 214 fn test_valid_hostnames() { 215 // Valid hostnames 216 assert!(is_valid_hostname("example.com")); 217 assert!(is_valid_hostname("subdomain.example.com")); 218 assert!(is_valid_hostname("with-hyphen.example.com")); 219 assert!(is_valid_hostname("123numeric.example.com")); 220 assert!(is_valid_hostname("xn--bcher-kva.example.com")); // IDN 221 } 222 223 #[test] 224 fn test_invalid_hostnames() { 225 // Invalid hostnames 226 assert!(!is_valid_hostname("")); // Empty 227 assert!(!is_valid_hostname("a".repeat(254).as_str())); // Too long 228 assert!(!is_valid_hostname("example.localhost")); // Reserved TLD 229 assert!(!is_valid_hostname("example.internal")); // Reserved TLD 230 assert!(!is_valid_hostname("example.arpa")); // Reserved TLD 231 assert!(!is_valid_hostname("example.local")); // Reserved TLD 232 assert!(!is_valid_hostname("invalid_char.example.com")); // Invalid underscore 233 assert!(!is_valid_hostname("-starts-with-hyphen.example.com")); // Label starts with hyphen 234 assert!(!is_valid_hostname("ends-with-hyphen-.example.com")); // Label ends with hyphen 235 assert!(!is_valid_hostname(&("a".repeat(64) + ".example.com"))); // Label too long 236 assert!(!is_valid_hostname(".starts.with.dot")); // Empty label 237 assert!(!is_valid_hostname("ends.with.dot.")); // Empty label 238 assert!(!is_valid_hostname("double..dot")); // Empty label 239 } 240 241 #[test] 242 fn test_valid_handles() { 243 // Valid handles 244 assert_eq!( 245 is_valid_handle("user.example.com"), 246 Some("user.example.com".to_string()) 247 ); 248 assert_eq!( 249 is_valid_handle("at://user.example.com"), 250 Some("user.example.com".to_string()) 251 ); 252 assert_eq!( 253 is_valid_handle("@user.example.com"), 254 Some("user.example.com".to_string()) 255 ); 256 } 257 258 #[test] 259 fn test_invalid_handles() { 260 // Invalid handles 261 assert_eq!(is_valid_handle("nodots"), None); // No dots 262 assert_eq!(is_valid_handle("at://invalid_char.example.com"), None); // Invalid character 263 assert_eq!(is_valid_handle("@example.localhost"), None); // Reserved TLD 264 } 265 266 #[test] 267 fn test_strip_handle_prefixes() { 268 assert_eq!(strip_handle_prefixes("example.com"), "example.com"); 269 assert_eq!(strip_handle_prefixes("at://example.com"), "example.com"); 270 assert_eq!(strip_handle_prefixes("@example.com"), "example.com"); 271 // Nested prefixes should only strip the outermost one 272 assert_eq!(strip_handle_prefixes("at://@example.com"), "@example.com"); 273 } 274}