this repo has no description
1use serde_json::Value; 2use thiserror::Error; 3 4#[derive(Debug, Error)] 5pub enum ValidationError { 6 #[error("No $type provided")] 7 MissingType, 8 9 #[error("Invalid $type: expected {expected}, got {actual}")] 10 TypeMismatch { expected: String, actual: String }, 11 12 #[error("Missing required field: {0}")] 13 MissingField(String), 14 15 #[error("Invalid field value at {path}: {message}")] 16 InvalidField { path: String, message: String }, 17 18 #[error("Invalid datetime format at {path}: must be RFC-3339/ISO-8601")] 19 InvalidDatetime { path: String }, 20 21 #[error("Invalid record: {0}")] 22 InvalidRecord(String), 23 24 #[error("Unknown record type: {0}")] 25 UnknownType(String), 26} 27 28#[derive(Debug, Clone, Copy, PartialEq, Eq)] 29pub enum ValidationStatus { 30 Valid, 31 Unknown, 32 Invalid, 33} 34 35pub struct RecordValidator { 36 require_lexicon: bool, 37} 38 39impl Default for RecordValidator { 40 fn default() -> Self { 41 Self::new() 42 } 43} 44 45impl RecordValidator { 46 pub fn new() -> Self { 47 Self { 48 require_lexicon: false, 49 } 50 } 51 52 pub fn require_lexicon(mut self, require: bool) -> Self { 53 self.require_lexicon = require; 54 self 55 } 56 57 pub fn validate( 58 &self, 59 record: &Value, 60 collection: &str, 61 ) -> Result<ValidationStatus, ValidationError> { 62 let obj = record 63 .as_object() 64 .ok_or_else(|| ValidationError::InvalidRecord("Record must be an object".to_string()))?; 65 66 let record_type = obj 67 .get("$type") 68 .and_then(|v| v.as_str()) 69 .ok_or(ValidationError::MissingType)?; 70 71 if record_type != collection { 72 return Err(ValidationError::TypeMismatch { 73 expected: collection.to_string(), 74 actual: record_type.to_string(), 75 }); 76 } 77 78 if let Some(created_at) = obj.get("createdAt").and_then(|v| v.as_str()) { 79 validate_datetime(created_at, "createdAt")?; 80 } 81 82 match record_type { 83 "app.bsky.feed.post" => self.validate_post(obj)?, 84 "app.bsky.actor.profile" => self.validate_profile(obj)?, 85 "app.bsky.feed.like" => self.validate_like(obj)?, 86 "app.bsky.feed.repost" => self.validate_repost(obj)?, 87 "app.bsky.graph.follow" => self.validate_follow(obj)?, 88 "app.bsky.graph.block" => self.validate_block(obj)?, 89 "app.bsky.graph.list" => self.validate_list(obj)?, 90 "app.bsky.graph.listitem" => self.validate_list_item(obj)?, 91 "app.bsky.feed.generator" => self.validate_feed_generator(obj)?, 92 "app.bsky.feed.threadgate" => self.validate_threadgate(obj)?, 93 "app.bsky.labeler.service" => self.validate_labeler_service(obj)?, 94 _ => { 95 if self.require_lexicon { 96 return Err(ValidationError::UnknownType(record_type.to_string())); 97 } 98 return Ok(ValidationStatus::Unknown); 99 } 100 } 101 102 Ok(ValidationStatus::Valid) 103 } 104 105 fn validate_post(&self, obj: &serde_json::Map<String, Value>) -> Result<(), ValidationError> { 106 if !obj.contains_key("text") { 107 return Err(ValidationError::MissingField("text".to_string())); 108 } 109 110 if !obj.contains_key("createdAt") { 111 return Err(ValidationError::MissingField("createdAt".to_string())); 112 } 113 114 if let Some(text) = obj.get("text").and_then(|v| v.as_str()) { 115 let grapheme_count = text.chars().count(); 116 if grapheme_count > 3000 { 117 return Err(ValidationError::InvalidField { 118 path: "text".to_string(), 119 message: format!("Text exceeds maximum length of 3000 characters (got {})", grapheme_count), 120 }); 121 } 122 } 123 124 if let Some(langs) = obj.get("langs").and_then(|v| v.as_array()) { 125 if langs.len() > 3 { 126 return Err(ValidationError::InvalidField { 127 path: "langs".to_string(), 128 message: "Maximum 3 languages allowed".to_string(), 129 }); 130 } 131 } 132 133 if let Some(tags) = obj.get("tags").and_then(|v| v.as_array()) { 134 if tags.len() > 8 { 135 return Err(ValidationError::InvalidField { 136 path: "tags".to_string(), 137 message: "Maximum 8 tags allowed".to_string(), 138 }); 139 } 140 for (i, tag) in tags.iter().enumerate() { 141 if let Some(tag_str) = tag.as_str() { 142 if tag_str.len() > 640 { 143 return Err(ValidationError::InvalidField { 144 path: format!("tags/{}", i), 145 message: "Tag exceeds maximum length of 640 bytes".to_string(), 146 }); 147 } 148 } 149 } 150 } 151 152 Ok(()) 153 } 154 155 fn validate_profile(&self, obj: &serde_json::Map<String, Value>) -> Result<(), ValidationError> { 156 if let Some(display_name) = obj.get("displayName").and_then(|v| v.as_str()) { 157 let grapheme_count = display_name.chars().count(); 158 if grapheme_count > 640 { 159 return Err(ValidationError::InvalidField { 160 path: "displayName".to_string(), 161 message: format!("Display name exceeds maximum length of 640 characters (got {})", grapheme_count), 162 }); 163 } 164 } 165 166 if let Some(description) = obj.get("description").and_then(|v| v.as_str()) { 167 let grapheme_count = description.chars().count(); 168 if grapheme_count > 2560 { 169 return Err(ValidationError::InvalidField { 170 path: "description".to_string(), 171 message: format!("Description exceeds maximum length of 2560 characters (got {})", grapheme_count), 172 }); 173 } 174 } 175 176 Ok(()) 177 } 178 179 fn validate_like(&self, obj: &serde_json::Map<String, Value>) -> Result<(), ValidationError> { 180 if !obj.contains_key("subject") { 181 return Err(ValidationError::MissingField("subject".to_string())); 182 } 183 if !obj.contains_key("createdAt") { 184 return Err(ValidationError::MissingField("createdAt".to_string())); 185 } 186 self.validate_strong_ref(obj.get("subject"), "subject")?; 187 Ok(()) 188 } 189 190 fn validate_repost(&self, obj: &serde_json::Map<String, Value>) -> Result<(), ValidationError> { 191 if !obj.contains_key("subject") { 192 return Err(ValidationError::MissingField("subject".to_string())); 193 } 194 if !obj.contains_key("createdAt") { 195 return Err(ValidationError::MissingField("createdAt".to_string())); 196 } 197 self.validate_strong_ref(obj.get("subject"), "subject")?; 198 Ok(()) 199 } 200 201 fn validate_follow(&self, obj: &serde_json::Map<String, Value>) -> Result<(), ValidationError> { 202 if !obj.contains_key("subject") { 203 return Err(ValidationError::MissingField("subject".to_string())); 204 } 205 if !obj.contains_key("createdAt") { 206 return Err(ValidationError::MissingField("createdAt".to_string())); 207 } 208 209 if let Some(subject) = obj.get("subject").and_then(|v| v.as_str()) { 210 if !subject.starts_with("did:") { 211 return Err(ValidationError::InvalidField { 212 path: "subject".to_string(), 213 message: "Subject must be a DID".to_string(), 214 }); 215 } 216 } 217 218 Ok(()) 219 } 220 221 fn validate_block(&self, obj: &serde_json::Map<String, Value>) -> Result<(), ValidationError> { 222 if !obj.contains_key("subject") { 223 return Err(ValidationError::MissingField("subject".to_string())); 224 } 225 if !obj.contains_key("createdAt") { 226 return Err(ValidationError::MissingField("createdAt".to_string())); 227 } 228 229 if let Some(subject) = obj.get("subject").and_then(|v| v.as_str()) { 230 if !subject.starts_with("did:") { 231 return Err(ValidationError::InvalidField { 232 path: "subject".to_string(), 233 message: "Subject must be a DID".to_string(), 234 }); 235 } 236 } 237 238 Ok(()) 239 } 240 241 fn validate_list(&self, obj: &serde_json::Map<String, Value>) -> Result<(), ValidationError> { 242 if !obj.contains_key("name") { 243 return Err(ValidationError::MissingField("name".to_string())); 244 } 245 if !obj.contains_key("purpose") { 246 return Err(ValidationError::MissingField("purpose".to_string())); 247 } 248 if !obj.contains_key("createdAt") { 249 return Err(ValidationError::MissingField("createdAt".to_string())); 250 } 251 252 if let Some(name) = obj.get("name").and_then(|v| v.as_str()) { 253 if name.is_empty() || name.len() > 64 { 254 return Err(ValidationError::InvalidField { 255 path: "name".to_string(), 256 message: "Name must be 1-64 characters".to_string(), 257 }); 258 } 259 } 260 261 Ok(()) 262 } 263 264 fn validate_list_item(&self, obj: &serde_json::Map<String, Value>) -> Result<(), ValidationError> { 265 if !obj.contains_key("subject") { 266 return Err(ValidationError::MissingField("subject".to_string())); 267 } 268 if !obj.contains_key("list") { 269 return Err(ValidationError::MissingField("list".to_string())); 270 } 271 if !obj.contains_key("createdAt") { 272 return Err(ValidationError::MissingField("createdAt".to_string())); 273 } 274 Ok(()) 275 } 276 277 fn validate_feed_generator(&self, obj: &serde_json::Map<String, Value>) -> Result<(), ValidationError> { 278 if !obj.contains_key("did") { 279 return Err(ValidationError::MissingField("did".to_string())); 280 } 281 if !obj.contains_key("displayName") { 282 return Err(ValidationError::MissingField("displayName".to_string())); 283 } 284 if !obj.contains_key("createdAt") { 285 return Err(ValidationError::MissingField("createdAt".to_string())); 286 } 287 288 if let Some(display_name) = obj.get("displayName").and_then(|v| v.as_str()) { 289 if display_name.is_empty() || display_name.len() > 240 { 290 return Err(ValidationError::InvalidField { 291 path: "displayName".to_string(), 292 message: "displayName must be 1-240 characters".to_string(), 293 }); 294 } 295 } 296 297 Ok(()) 298 } 299 300 fn validate_threadgate(&self, obj: &serde_json::Map<String, Value>) -> Result<(), ValidationError> { 301 if !obj.contains_key("post") { 302 return Err(ValidationError::MissingField("post".to_string())); 303 } 304 if !obj.contains_key("createdAt") { 305 return Err(ValidationError::MissingField("createdAt".to_string())); 306 } 307 Ok(()) 308 } 309 310 fn validate_labeler_service(&self, obj: &serde_json::Map<String, Value>) -> Result<(), ValidationError> { 311 if !obj.contains_key("policies") { 312 return Err(ValidationError::MissingField("policies".to_string())); 313 } 314 if !obj.contains_key("createdAt") { 315 return Err(ValidationError::MissingField("createdAt".to_string())); 316 } 317 Ok(()) 318 } 319 320 fn validate_strong_ref(&self, value: Option<&Value>, path: &str) -> Result<(), ValidationError> { 321 let obj = value 322 .and_then(|v| v.as_object()) 323 .ok_or_else(|| ValidationError::InvalidField { 324 path: path.to_string(), 325 message: "Must be a strong reference object".to_string(), 326 })?; 327 328 if !obj.contains_key("uri") { 329 return Err(ValidationError::MissingField(format!("{}/uri", path))); 330 } 331 if !obj.contains_key("cid") { 332 return Err(ValidationError::MissingField(format!("{}/cid", path))); 333 } 334 335 if let Some(uri) = obj.get("uri").and_then(|v| v.as_str()) { 336 if !uri.starts_with("at://") { 337 return Err(ValidationError::InvalidField { 338 path: format!("{}/uri", path), 339 message: "URI must be an at:// URI".to_string(), 340 }); 341 } 342 } 343 344 Ok(()) 345 } 346} 347 348fn validate_datetime(value: &str, path: &str) -> Result<(), ValidationError> { 349 if chrono::DateTime::parse_from_rfc3339(value).is_err() { 350 return Err(ValidationError::InvalidDatetime { 351 path: path.to_string(), 352 }); 353 } 354 Ok(()) 355} 356 357pub fn validate_record_key(rkey: &str) -> Result<(), ValidationError> { 358 if rkey.is_empty() { 359 return Err(ValidationError::InvalidRecord("Record key cannot be empty".to_string())); 360 } 361 362 if rkey.len() > 512 { 363 return Err(ValidationError::InvalidRecord("Record key exceeds maximum length of 512".to_string())); 364 } 365 366 if rkey == "." || rkey == ".." { 367 return Err(ValidationError::InvalidRecord("Record key cannot be '.' or '..'".to_string())); 368 } 369 370 let valid_chars = rkey.chars().all(|c| { 371 c.is_ascii_alphanumeric() || c == '.' || c == '-' || c == '_' || c == '~' 372 }); 373 374 if !valid_chars { 375 return Err(ValidationError::InvalidRecord( 376 "Record key contains invalid characters (must be alphanumeric, '.', '-', '_', or '~')".to_string() 377 )); 378 } 379 380 Ok(()) 381} 382 383pub fn validate_collection_nsid(collection: &str) -> Result<(), ValidationError> { 384 if collection.is_empty() { 385 return Err(ValidationError::InvalidRecord("Collection NSID cannot be empty".to_string())); 386 } 387 388 let parts: Vec<&str> = collection.split('.').collect(); 389 if parts.len() < 3 { 390 return Err(ValidationError::InvalidRecord( 391 "Collection NSID must have at least 3 segments".to_string() 392 )); 393 } 394 395 for part in &parts { 396 if part.is_empty() { 397 return Err(ValidationError::InvalidRecord( 398 "Collection NSID segments cannot be empty".to_string() 399 )); 400 } 401 if !part.chars().all(|c| c.is_ascii_alphanumeric() || c == '-') { 402 return Err(ValidationError::InvalidRecord( 403 "Collection NSID segments must be alphanumeric or hyphens".to_string() 404 )); 405 } 406 } 407 408 Ok(()) 409} 410 411#[cfg(test)] 412mod tests { 413 use super::*; 414 use serde_json::json; 415 416 #[test] 417 fn test_validate_post() { 418 let validator = RecordValidator::new(); 419 420 let valid_post = json!({ 421 "$type": "app.bsky.feed.post", 422 "text": "Hello, world!", 423 "createdAt": "2024-01-01T00:00:00.000Z" 424 }); 425 426 assert_eq!( 427 validator.validate(&valid_post, "app.bsky.feed.post").unwrap(), 428 ValidationStatus::Valid 429 ); 430 } 431 432 #[test] 433 fn test_validate_post_missing_text() { 434 let validator = RecordValidator::new(); 435 436 let invalid_post = json!({ 437 "$type": "app.bsky.feed.post", 438 "createdAt": "2024-01-01T00:00:00.000Z" 439 }); 440 441 assert!(validator.validate(&invalid_post, "app.bsky.feed.post").is_err()); 442 } 443 444 #[test] 445 fn test_validate_type_mismatch() { 446 let validator = RecordValidator::new(); 447 448 let record = json!({ 449 "$type": "app.bsky.feed.like", 450 "subject": {"uri": "at://did:plc:test/app.bsky.feed.post/123", "cid": "bafyrei..."}, 451 "createdAt": "2024-01-01T00:00:00.000Z" 452 }); 453 454 let result = validator.validate(&record, "app.bsky.feed.post"); 455 assert!(matches!(result, Err(ValidationError::TypeMismatch { .. }))); 456 } 457 458 #[test] 459 fn test_validate_unknown_type() { 460 let validator = RecordValidator::new(); 461 462 let record = json!({ 463 "$type": "com.example.custom", 464 "data": "test" 465 }); 466 467 assert_eq!( 468 validator.validate(&record, "com.example.custom").unwrap(), 469 ValidationStatus::Unknown 470 ); 471 } 472 473 #[test] 474 fn test_validate_unknown_type_strict() { 475 let validator = RecordValidator::new().require_lexicon(true); 476 477 let record = json!({ 478 "$type": "com.example.custom", 479 "data": "test" 480 }); 481 482 let result = validator.validate(&record, "com.example.custom"); 483 assert!(matches!(result, Err(ValidationError::UnknownType(_)))); 484 } 485 486 #[test] 487 fn test_validate_record_key() { 488 assert!(validate_record_key("valid-key_123").is_ok()); 489 assert!(validate_record_key("3k2n5j2").is_ok()); 490 assert!(validate_record_key(".").is_err()); 491 assert!(validate_record_key("..").is_err()); 492 assert!(validate_record_key("").is_err()); 493 assert!(validate_record_key("invalid/key").is_err()); 494 } 495 496 #[test] 497 fn test_validate_collection_nsid() { 498 assert!(validate_collection_nsid("app.bsky.feed.post").is_ok()); 499 assert!(validate_collection_nsid("com.atproto.repo.record").is_ok()); 500 assert!(validate_collection_nsid("invalid").is_err()); 501 assert!(validate_collection_nsid("a.b").is_err()); 502 assert!(validate_collection_nsid("").is_err()); 503 } 504}