this repo has no description
1use serde_json::Value; 2use thiserror::Error; 3 4#[derive(Debug, Error)] 5pub enum ValidationError { 6 #[error("No $type provided")] 7 MissingType, 8 #[error("Invalid $type: expected {expected}, got {actual}")] 9 TypeMismatch { expected: String, actual: String }, 10 #[error("Missing required field: {0}")] 11 MissingField(String), 12 #[error("Invalid field value at {path}: {message}")] 13 InvalidField { path: String, message: String }, 14 #[error("Invalid datetime format at {path}: must be RFC-3339/ISO-8601")] 15 InvalidDatetime { path: String }, 16 #[error("Invalid record: {0}")] 17 InvalidRecord(String), 18 #[error("Unknown record type: {0}")] 19 UnknownType(String), 20} 21 22#[derive(Debug, Clone, Copy, PartialEq, Eq)] 23pub enum ValidationStatus { 24 Valid, 25 Unknown, 26 Invalid, 27} 28 29pub struct RecordValidator { 30 require_lexicon: bool, 31} 32 33impl Default for RecordValidator { 34 fn default() -> Self { 35 Self::new() 36 } 37} 38 39impl RecordValidator { 40 pub fn new() -> Self { 41 Self { 42 require_lexicon: false, 43 } 44 } 45 46 pub fn require_lexicon(mut self, require: bool) -> Self { 47 self.require_lexicon = require; 48 self 49 } 50 51 pub fn validate( 52 &self, 53 record: &Value, 54 collection: &str, 55 ) -> Result<ValidationStatus, ValidationError> { 56 let obj = record 57 .as_object() 58 .ok_or_else(|| ValidationError::InvalidRecord("Record must be an object".to_string()))?; 59 let record_type = obj 60 .get("$type") 61 .and_then(|v| v.as_str()) 62 .ok_or(ValidationError::MissingType)?; 63 if record_type != collection { 64 return Err(ValidationError::TypeMismatch { 65 expected: collection.to_string(), 66 actual: record_type.to_string(), 67 }); 68 } 69 if let Some(created_at) = obj.get("createdAt").and_then(|v| v.as_str()) { 70 validate_datetime(created_at, "createdAt")?; 71 } 72 match record_type { 73 "app.bsky.feed.post" => self.validate_post(obj)?, 74 "app.bsky.actor.profile" => self.validate_profile(obj)?, 75 "app.bsky.feed.like" => self.validate_like(obj)?, 76 "app.bsky.feed.repost" => self.validate_repost(obj)?, 77 "app.bsky.graph.follow" => self.validate_follow(obj)?, 78 "app.bsky.graph.block" => self.validate_block(obj)?, 79 "app.bsky.graph.list" => self.validate_list(obj)?, 80 "app.bsky.graph.listitem" => self.validate_list_item(obj)?, 81 "app.bsky.feed.generator" => self.validate_feed_generator(obj)?, 82 "app.bsky.feed.threadgate" => self.validate_threadgate(obj)?, 83 "app.bsky.labeler.service" => self.validate_labeler_service(obj)?, 84 _ => { 85 if self.require_lexicon { 86 return Err(ValidationError::UnknownType(record_type.to_string())); 87 } 88 return Ok(ValidationStatus::Unknown); 89 } 90 } 91 Ok(ValidationStatus::Valid) 92 } 93 94 fn validate_post(&self, obj: &serde_json::Map<String, Value>) -> Result<(), ValidationError> { 95 if !obj.contains_key("text") { 96 return Err(ValidationError::MissingField("text".to_string())); 97 } 98 if !obj.contains_key("createdAt") { 99 return Err(ValidationError::MissingField("createdAt".to_string())); 100 } 101 if let Some(text) = obj.get("text").and_then(|v| v.as_str()) { 102 let grapheme_count = text.chars().count(); 103 if grapheme_count > 3000 { 104 return Err(ValidationError::InvalidField { 105 path: "text".to_string(), 106 message: format!("Text exceeds maximum length of 3000 characters (got {})", grapheme_count), 107 }); 108 } 109 } 110 if let Some(langs) = obj.get("langs").and_then(|v| v.as_array()) { 111 if langs.len() > 3 { 112 return Err(ValidationError::InvalidField { 113 path: "langs".to_string(), 114 message: "Maximum 3 languages allowed".to_string(), 115 }); 116 } 117 } 118 if let Some(tags) = obj.get("tags").and_then(|v| v.as_array()) { 119 if tags.len() > 8 { 120 return Err(ValidationError::InvalidField { 121 path: "tags".to_string(), 122 message: "Maximum 8 tags allowed".to_string(), 123 }); 124 } 125 for (i, tag) in tags.iter().enumerate() { 126 if let Some(tag_str) = tag.as_str() { 127 if tag_str.len() > 640 { 128 return Err(ValidationError::InvalidField { 129 path: format!("tags/{}", i), 130 message: "Tag exceeds maximum length of 640 bytes".to_string(), 131 }); 132 } 133 } 134 } 135 } 136 Ok(()) 137 } 138 139 fn validate_profile(&self, obj: &serde_json::Map<String, Value>) -> Result<(), ValidationError> { 140 if let Some(display_name) = obj.get("displayName").and_then(|v| v.as_str()) { 141 let grapheme_count = display_name.chars().count(); 142 if grapheme_count > 640 { 143 return Err(ValidationError::InvalidField { 144 path: "displayName".to_string(), 145 message: format!("Display name exceeds maximum length of 640 characters (got {})", grapheme_count), 146 }); 147 } 148 } 149 if let Some(description) = obj.get("description").and_then(|v| v.as_str()) { 150 let grapheme_count = description.chars().count(); 151 if grapheme_count > 2560 { 152 return Err(ValidationError::InvalidField { 153 path: "description".to_string(), 154 message: format!("Description exceeds maximum length of 2560 characters (got {})", grapheme_count), 155 }); 156 } 157 } 158 Ok(()) 159 } 160 161 fn validate_like(&self, obj: &serde_json::Map<String, Value>) -> Result<(), ValidationError> { 162 if !obj.contains_key("subject") { 163 return Err(ValidationError::MissingField("subject".to_string())); 164 } 165 if !obj.contains_key("createdAt") { 166 return Err(ValidationError::MissingField("createdAt".to_string())); 167 } 168 self.validate_strong_ref(obj.get("subject"), "subject")?; 169 Ok(()) 170 } 171 172 fn validate_repost(&self, obj: &serde_json::Map<String, Value>) -> Result<(), ValidationError> { 173 if !obj.contains_key("subject") { 174 return Err(ValidationError::MissingField("subject".to_string())); 175 } 176 if !obj.contains_key("createdAt") { 177 return Err(ValidationError::MissingField("createdAt".to_string())); 178 } 179 self.validate_strong_ref(obj.get("subject"), "subject")?; 180 Ok(()) 181 } 182 183 fn validate_follow(&self, obj: &serde_json::Map<String, Value>) -> Result<(), ValidationError> { 184 if !obj.contains_key("subject") { 185 return Err(ValidationError::MissingField("subject".to_string())); 186 } 187 if !obj.contains_key("createdAt") { 188 return Err(ValidationError::MissingField("createdAt".to_string())); 189 } 190 if let Some(subject) = obj.get("subject").and_then(|v| v.as_str()) { 191 if !subject.starts_with("did:") { 192 return Err(ValidationError::InvalidField { 193 path: "subject".to_string(), 194 message: "Subject must be a DID".to_string(), 195 }); 196 } 197 } 198 Ok(()) 199 } 200 201 fn validate_block(&self, obj: &serde_json::Map<String, Value>) -> Result<(), ValidationError> { 202 if !obj.contains_key("subject") { 203 return Err(ValidationError::MissingField("subject".to_string())); 204 } 205 if !obj.contains_key("createdAt") { 206 return Err(ValidationError::MissingField("createdAt".to_string())); 207 } 208 if let Some(subject) = obj.get("subject").and_then(|v| v.as_str()) { 209 if !subject.starts_with("did:") { 210 return Err(ValidationError::InvalidField { 211 path: "subject".to_string(), 212 message: "Subject must be a DID".to_string(), 213 }); 214 } 215 } 216 Ok(()) 217 } 218 219 fn validate_list(&self, obj: &serde_json::Map<String, Value>) -> Result<(), ValidationError> { 220 if !obj.contains_key("name") { 221 return Err(ValidationError::MissingField("name".to_string())); 222 } 223 if !obj.contains_key("purpose") { 224 return Err(ValidationError::MissingField("purpose".to_string())); 225 } 226 if !obj.contains_key("createdAt") { 227 return Err(ValidationError::MissingField("createdAt".to_string())); 228 } 229 if let Some(name) = obj.get("name").and_then(|v| v.as_str()) { 230 if name.is_empty() || name.len() > 64 { 231 return Err(ValidationError::InvalidField { 232 path: "name".to_string(), 233 message: "Name must be 1-64 characters".to_string(), 234 }); 235 } 236 } 237 Ok(()) 238 } 239 240 fn validate_list_item(&self, obj: &serde_json::Map<String, Value>) -> Result<(), ValidationError> { 241 if !obj.contains_key("subject") { 242 return Err(ValidationError::MissingField("subject".to_string())); 243 } 244 if !obj.contains_key("list") { 245 return Err(ValidationError::MissingField("list".to_string())); 246 } 247 if !obj.contains_key("createdAt") { 248 return Err(ValidationError::MissingField("createdAt".to_string())); 249 } 250 Ok(()) 251 } 252 253 fn validate_feed_generator(&self, obj: &serde_json::Map<String, Value>) -> Result<(), ValidationError> { 254 if !obj.contains_key("did") { 255 return Err(ValidationError::MissingField("did".to_string())); 256 } 257 if !obj.contains_key("displayName") { 258 return Err(ValidationError::MissingField("displayName".to_string())); 259 } 260 if !obj.contains_key("createdAt") { 261 return Err(ValidationError::MissingField("createdAt".to_string())); 262 } 263 if let Some(display_name) = obj.get("displayName").and_then(|v| v.as_str()) { 264 if display_name.is_empty() || display_name.len() > 240 { 265 return Err(ValidationError::InvalidField { 266 path: "displayName".to_string(), 267 message: "displayName must be 1-240 characters".to_string(), 268 }); 269 } 270 } 271 Ok(()) 272 } 273 274 fn validate_threadgate(&self, obj: &serde_json::Map<String, Value>) -> Result<(), ValidationError> { 275 if !obj.contains_key("post") { 276 return Err(ValidationError::MissingField("post".to_string())); 277 } 278 if !obj.contains_key("createdAt") { 279 return Err(ValidationError::MissingField("createdAt".to_string())); 280 } 281 Ok(()) 282 } 283 284 fn validate_labeler_service(&self, obj: &serde_json::Map<String, Value>) -> Result<(), ValidationError> { 285 if !obj.contains_key("policies") { 286 return Err(ValidationError::MissingField("policies".to_string())); 287 } 288 if !obj.contains_key("createdAt") { 289 return Err(ValidationError::MissingField("createdAt".to_string())); 290 } 291 Ok(()) 292 } 293 294 fn validate_strong_ref(&self, value: Option<&Value>, path: &str) -> Result<(), ValidationError> { 295 let obj = value 296 .and_then(|v| v.as_object()) 297 .ok_or_else(|| ValidationError::InvalidField { 298 path: path.to_string(), 299 message: "Must be a strong reference object".to_string(), 300 })?; 301 if !obj.contains_key("uri") { 302 return Err(ValidationError::MissingField(format!("{}/uri", path))); 303 } 304 if !obj.contains_key("cid") { 305 return Err(ValidationError::MissingField(format!("{}/cid", path))); 306 } 307 if let Some(uri) = obj.get("uri").and_then(|v| v.as_str()) { 308 if !uri.starts_with("at://") { 309 return Err(ValidationError::InvalidField { 310 path: format!("{}/uri", path), 311 message: "URI must be an at:// URI".to_string(), 312 }); 313 } 314 } 315 Ok(()) 316 } 317} 318 319fn validate_datetime(value: &str, path: &str) -> Result<(), ValidationError> { 320 if chrono::DateTime::parse_from_rfc3339(value).is_err() { 321 return Err(ValidationError::InvalidDatetime { 322 path: path.to_string(), 323 }); 324 } 325 Ok(()) 326} 327 328pub fn validate_record_key(rkey: &str) -> Result<(), ValidationError> { 329 if rkey.is_empty() { 330 return Err(ValidationError::InvalidRecord("Record key cannot be empty".to_string())); 331 } 332 if rkey.len() > 512 { 333 return Err(ValidationError::InvalidRecord("Record key exceeds maximum length of 512".to_string())); 334 } 335 if rkey == "." || rkey == ".." { 336 return Err(ValidationError::InvalidRecord("Record key cannot be '.' or '..'".to_string())); 337 } 338 let valid_chars = rkey.chars().all(|c| { 339 c.is_ascii_alphanumeric() || c == '.' || c == '-' || c == '_' || c == '~' 340 }); 341 if !valid_chars { 342 return Err(ValidationError::InvalidRecord( 343 "Record key contains invalid characters (must be alphanumeric, '.', '-', '_', or '~')".to_string() 344 )); 345 } 346 Ok(()) 347} 348 349pub fn validate_collection_nsid(collection: &str) -> Result<(), ValidationError> { 350 if collection.is_empty() { 351 return Err(ValidationError::InvalidRecord("Collection NSID cannot be empty".to_string())); 352 } 353 let parts: Vec<&str> = collection.split('.').collect(); 354 if parts.len() < 3 { 355 return Err(ValidationError::InvalidRecord( 356 "Collection NSID must have at least 3 segments".to_string() 357 )); 358 } 359 for part in &parts { 360 if part.is_empty() { 361 return Err(ValidationError::InvalidRecord( 362 "Collection NSID segments cannot be empty".to_string() 363 )); 364 } 365 if !part.chars().all(|c| c.is_ascii_alphanumeric() || c == '-') { 366 return Err(ValidationError::InvalidRecord( 367 "Collection NSID segments must be alphanumeric or hyphens".to_string() 368 )); 369 } 370 } 371 Ok(()) 372} 373 374#[cfg(test)] 375mod tests { 376 use super::*; 377 use serde_json::json; 378 379 #[test] 380 fn test_validate_post() { 381 let validator = RecordValidator::new(); 382 let valid_post = json!({ 383 "$type": "app.bsky.feed.post", 384 "text": "Hello, world!", 385 "createdAt": "2024-01-01T00:00:00.000Z" 386 }); 387 assert_eq!( 388 validator.validate(&valid_post, "app.bsky.feed.post").unwrap(), 389 ValidationStatus::Valid 390 ); 391 } 392 393 #[test] 394 fn test_validate_post_missing_text() { 395 let validator = RecordValidator::new(); 396 let invalid_post = json!({ 397 "$type": "app.bsky.feed.post", 398 "createdAt": "2024-01-01T00:00:00.000Z" 399 }); 400 assert!(validator.validate(&invalid_post, "app.bsky.feed.post").is_err()); 401 } 402 403 #[test] 404 fn test_validate_type_mismatch() { 405 let validator = RecordValidator::new(); 406 let record = json!({ 407 "$type": "app.bsky.feed.like", 408 "subject": {"uri": "at://did:plc:test/app.bsky.feed.post/123", "cid": "bafyrei..."}, 409 "createdAt": "2024-01-01T00:00:00.000Z" 410 }); 411 let result = validator.validate(&record, "app.bsky.feed.post"); 412 assert!(matches!(result, Err(ValidationError::TypeMismatch { .. }))); 413 } 414 415 #[test] 416 fn test_validate_unknown_type() { 417 let validator = RecordValidator::new(); 418 let record = json!({ 419 "$type": "com.example.custom", 420 "data": "test" 421 }); 422 assert_eq!( 423 validator.validate(&record, "com.example.custom").unwrap(), 424 ValidationStatus::Unknown 425 ); 426 } 427 428 #[test] 429 fn test_validate_unknown_type_strict() { 430 let validator = RecordValidator::new().require_lexicon(true); 431 let record = json!({ 432 "$type": "com.example.custom", 433 "data": "test" 434 }); 435 let result = validator.validate(&record, "com.example.custom"); 436 assert!(matches!(result, Err(ValidationError::UnknownType(_)))); 437 } 438 439 #[test] 440 fn test_validate_record_key() { 441 assert!(validate_record_key("valid-key_123").is_ok()); 442 assert!(validate_record_key("3k2n5j2").is_ok()); 443 assert!(validate_record_key(".").is_err()); 444 assert!(validate_record_key("..").is_err()); 445 assert!(validate_record_key("").is_err()); 446 assert!(validate_record_key("invalid/key").is_err()); 447 } 448 449 #[test] 450 fn test_validate_collection_nsid() { 451 assert!(validate_collection_nsid("app.bsky.feed.post").is_ok()); 452 assert!(validate_collection_nsid("com.atproto.repo.record").is_ok()); 453 assert!(validate_collection_nsid("invalid").is_err()); 454 assert!(validate_collection_nsid("a.b").is_err()); 455 assert!(validate_collection_nsid("").is_err()); 456 } 457}