this repo has no description
1use serde_json::Value; 2use thiserror::Error; 3 4#[derive(Debug, Error)] 5pub enum ValidationError { 6 #[error("No $type provided")] 7 MissingType, 8 #[error("Invalid $type: expected {expected}, got {actual}")] 9 TypeMismatch { expected: String, actual: String }, 10 #[error("Missing required field: {0}")] 11 MissingField(String), 12 #[error("Invalid field value at {path}: {message}")] 13 InvalidField { path: String, message: String }, 14 #[error("Invalid datetime format at {path}: must be RFC-3339/ISO-8601")] 15 InvalidDatetime { path: String }, 16 #[error("Invalid record: {0}")] 17 InvalidRecord(String), 18 #[error("Unknown record type: {0}")] 19 UnknownType(String), 20} 21 22#[derive(Debug, Clone, Copy, PartialEq, Eq)] 23pub enum ValidationStatus { 24 Valid, 25 Unknown, 26 Invalid, 27} 28 29pub struct RecordValidator { 30 require_lexicon: bool, 31} 32 33impl Default for RecordValidator { 34 fn default() -> Self { 35 Self::new() 36 } 37} 38 39impl RecordValidator { 40 pub fn new() -> Self { 41 Self { 42 require_lexicon: false, 43 } 44 } 45 46 pub fn require_lexicon(mut self, require: bool) -> Self { 47 self.require_lexicon = require; 48 self 49 } 50 51 pub fn validate( 52 &self, 53 record: &Value, 54 collection: &str, 55 ) -> Result<ValidationStatus, ValidationError> { 56 let obj = record.as_object().ok_or_else(|| { 57 ValidationError::InvalidRecord("Record must be an object".to_string()) 58 })?; 59 let record_type = obj 60 .get("$type") 61 .and_then(|v| v.as_str()) 62 .ok_or(ValidationError::MissingType)?; 63 if record_type != collection { 64 return Err(ValidationError::TypeMismatch { 65 expected: collection.to_string(), 66 actual: record_type.to_string(), 67 }); 68 } 69 if let Some(created_at) = obj.get("createdAt").and_then(|v| v.as_str()) { 70 validate_datetime(created_at, "createdAt")?; 71 } 72 match record_type { 73 "app.bsky.feed.post" => self.validate_post(obj)?, 74 "app.bsky.actor.profile" => self.validate_profile(obj)?, 75 "app.bsky.feed.like" => self.validate_like(obj)?, 76 "app.bsky.feed.repost" => self.validate_repost(obj)?, 77 "app.bsky.graph.follow" => self.validate_follow(obj)?, 78 "app.bsky.graph.block" => self.validate_block(obj)?, 79 "app.bsky.graph.list" => self.validate_list(obj)?, 80 "app.bsky.graph.listitem" => self.validate_list_item(obj)?, 81 "app.bsky.feed.generator" => self.validate_feed_generator(obj)?, 82 "app.bsky.feed.threadgate" => self.validate_threadgate(obj)?, 83 "app.bsky.labeler.service" => self.validate_labeler_service(obj)?, 84 _ => { 85 if self.require_lexicon { 86 return Err(ValidationError::UnknownType(record_type.to_string())); 87 } 88 return Ok(ValidationStatus::Unknown); 89 } 90 } 91 Ok(ValidationStatus::Valid) 92 } 93 94 fn validate_post(&self, obj: &serde_json::Map<String, Value>) -> Result<(), ValidationError> { 95 if !obj.contains_key("text") { 96 return Err(ValidationError::MissingField("text".to_string())); 97 } 98 if !obj.contains_key("createdAt") { 99 return Err(ValidationError::MissingField("createdAt".to_string())); 100 } 101 if let Some(text) = obj.get("text").and_then(|v| v.as_str()) { 102 let grapheme_count = text.chars().count(); 103 if grapheme_count > 3000 { 104 return Err(ValidationError::InvalidField { 105 path: "text".to_string(), 106 message: format!( 107 "Text exceeds maximum length of 3000 characters (got {})", 108 grapheme_count 109 ), 110 }); 111 } 112 } 113 if let Some(langs) = obj.get("langs").and_then(|v| v.as_array()) 114 && langs.len() > 3 { 115 return Err(ValidationError::InvalidField { 116 path: "langs".to_string(), 117 message: "Maximum 3 languages allowed".to_string(), 118 }); 119 } 120 if let Some(tags) = obj.get("tags").and_then(|v| v.as_array()) { 121 if tags.len() > 8 { 122 return Err(ValidationError::InvalidField { 123 path: "tags".to_string(), 124 message: "Maximum 8 tags allowed".to_string(), 125 }); 126 } 127 for (i, tag) in tags.iter().enumerate() { 128 if let Some(tag_str) = tag.as_str() 129 && tag_str.len() > 640 { 130 return Err(ValidationError::InvalidField { 131 path: format!("tags/{}", i), 132 message: "Tag exceeds maximum length of 640 bytes".to_string(), 133 }); 134 } 135 } 136 } 137 Ok(()) 138 } 139 140 fn validate_profile( 141 &self, 142 obj: &serde_json::Map<String, Value>, 143 ) -> Result<(), ValidationError> { 144 if let Some(display_name) = obj.get("displayName").and_then(|v| v.as_str()) { 145 let grapheme_count = display_name.chars().count(); 146 if grapheme_count > 640 { 147 return Err(ValidationError::InvalidField { 148 path: "displayName".to_string(), 149 message: format!( 150 "Display name exceeds maximum length of 640 characters (got {})", 151 grapheme_count 152 ), 153 }); 154 } 155 } 156 if let Some(description) = obj.get("description").and_then(|v| v.as_str()) { 157 let grapheme_count = description.chars().count(); 158 if grapheme_count > 2560 { 159 return Err(ValidationError::InvalidField { 160 path: "description".to_string(), 161 message: format!( 162 "Description exceeds maximum length of 2560 characters (got {})", 163 grapheme_count 164 ), 165 }); 166 } 167 } 168 Ok(()) 169 } 170 171 fn validate_like(&self, obj: &serde_json::Map<String, Value>) -> Result<(), ValidationError> { 172 if !obj.contains_key("subject") { 173 return Err(ValidationError::MissingField("subject".to_string())); 174 } 175 if !obj.contains_key("createdAt") { 176 return Err(ValidationError::MissingField("createdAt".to_string())); 177 } 178 self.validate_strong_ref(obj.get("subject"), "subject")?; 179 Ok(()) 180 } 181 182 fn validate_repost(&self, obj: &serde_json::Map<String, Value>) -> Result<(), ValidationError> { 183 if !obj.contains_key("subject") { 184 return Err(ValidationError::MissingField("subject".to_string())); 185 } 186 if !obj.contains_key("createdAt") { 187 return Err(ValidationError::MissingField("createdAt".to_string())); 188 } 189 self.validate_strong_ref(obj.get("subject"), "subject")?; 190 Ok(()) 191 } 192 193 fn validate_follow(&self, obj: &serde_json::Map<String, Value>) -> Result<(), ValidationError> { 194 if !obj.contains_key("subject") { 195 return Err(ValidationError::MissingField("subject".to_string())); 196 } 197 if !obj.contains_key("createdAt") { 198 return Err(ValidationError::MissingField("createdAt".to_string())); 199 } 200 if let Some(subject) = obj.get("subject").and_then(|v| v.as_str()) 201 && !subject.starts_with("did:") { 202 return Err(ValidationError::InvalidField { 203 path: "subject".to_string(), 204 message: "Subject must be a DID".to_string(), 205 }); 206 } 207 Ok(()) 208 } 209 210 fn validate_block(&self, obj: &serde_json::Map<String, Value>) -> Result<(), ValidationError> { 211 if !obj.contains_key("subject") { 212 return Err(ValidationError::MissingField("subject".to_string())); 213 } 214 if !obj.contains_key("createdAt") { 215 return Err(ValidationError::MissingField("createdAt".to_string())); 216 } 217 if let Some(subject) = obj.get("subject").and_then(|v| v.as_str()) 218 && !subject.starts_with("did:") { 219 return Err(ValidationError::InvalidField { 220 path: "subject".to_string(), 221 message: "Subject must be a DID".to_string(), 222 }); 223 } 224 Ok(()) 225 } 226 227 fn validate_list(&self, obj: &serde_json::Map<String, Value>) -> Result<(), ValidationError> { 228 if !obj.contains_key("name") { 229 return Err(ValidationError::MissingField("name".to_string())); 230 } 231 if !obj.contains_key("purpose") { 232 return Err(ValidationError::MissingField("purpose".to_string())); 233 } 234 if !obj.contains_key("createdAt") { 235 return Err(ValidationError::MissingField("createdAt".to_string())); 236 } 237 if let Some(name) = obj.get("name").and_then(|v| v.as_str()) 238 && (name.is_empty() || name.len() > 64) { 239 return Err(ValidationError::InvalidField { 240 path: "name".to_string(), 241 message: "Name must be 1-64 characters".to_string(), 242 }); 243 } 244 Ok(()) 245 } 246 247 fn validate_list_item( 248 &self, 249 obj: &serde_json::Map<String, Value>, 250 ) -> Result<(), ValidationError> { 251 if !obj.contains_key("subject") { 252 return Err(ValidationError::MissingField("subject".to_string())); 253 } 254 if !obj.contains_key("list") { 255 return Err(ValidationError::MissingField("list".to_string())); 256 } 257 if !obj.contains_key("createdAt") { 258 return Err(ValidationError::MissingField("createdAt".to_string())); 259 } 260 Ok(()) 261 } 262 263 fn validate_feed_generator( 264 &self, 265 obj: &serde_json::Map<String, Value>, 266 ) -> Result<(), ValidationError> { 267 if !obj.contains_key("did") { 268 return Err(ValidationError::MissingField("did".to_string())); 269 } 270 if !obj.contains_key("displayName") { 271 return Err(ValidationError::MissingField("displayName".to_string())); 272 } 273 if !obj.contains_key("createdAt") { 274 return Err(ValidationError::MissingField("createdAt".to_string())); 275 } 276 if let Some(display_name) = obj.get("displayName").and_then(|v| v.as_str()) 277 && (display_name.is_empty() || display_name.len() > 240) { 278 return Err(ValidationError::InvalidField { 279 path: "displayName".to_string(), 280 message: "displayName must be 1-240 characters".to_string(), 281 }); 282 } 283 Ok(()) 284 } 285 286 fn validate_threadgate( 287 &self, 288 obj: &serde_json::Map<String, Value>, 289 ) -> Result<(), ValidationError> { 290 if !obj.contains_key("post") { 291 return Err(ValidationError::MissingField("post".to_string())); 292 } 293 if !obj.contains_key("createdAt") { 294 return Err(ValidationError::MissingField("createdAt".to_string())); 295 } 296 Ok(()) 297 } 298 299 fn validate_labeler_service( 300 &self, 301 obj: &serde_json::Map<String, Value>, 302 ) -> Result<(), ValidationError> { 303 if !obj.contains_key("policies") { 304 return Err(ValidationError::MissingField("policies".to_string())); 305 } 306 if !obj.contains_key("createdAt") { 307 return Err(ValidationError::MissingField("createdAt".to_string())); 308 } 309 Ok(()) 310 } 311 312 fn validate_strong_ref( 313 &self, 314 value: Option<&Value>, 315 path: &str, 316 ) -> Result<(), ValidationError> { 317 let obj = 318 value 319 .and_then(|v| v.as_object()) 320 .ok_or_else(|| ValidationError::InvalidField { 321 path: path.to_string(), 322 message: "Must be a strong reference object".to_string(), 323 })?; 324 if !obj.contains_key("uri") { 325 return Err(ValidationError::MissingField(format!("{}/uri", path))); 326 } 327 if !obj.contains_key("cid") { 328 return Err(ValidationError::MissingField(format!("{}/cid", path))); 329 } 330 if let Some(uri) = obj.get("uri").and_then(|v| v.as_str()) 331 && !uri.starts_with("at://") { 332 return Err(ValidationError::InvalidField { 333 path: format!("{}/uri", path), 334 message: "URI must be an at:// URI".to_string(), 335 }); 336 } 337 Ok(()) 338 } 339} 340 341fn validate_datetime(value: &str, path: &str) -> Result<(), ValidationError> { 342 if chrono::DateTime::parse_from_rfc3339(value).is_err() { 343 return Err(ValidationError::InvalidDatetime { 344 path: path.to_string(), 345 }); 346 } 347 Ok(()) 348} 349 350pub fn validate_record_key(rkey: &str) -> Result<(), ValidationError> { 351 if rkey.is_empty() { 352 return Err(ValidationError::InvalidRecord( 353 "Record key cannot be empty".to_string(), 354 )); 355 } 356 if rkey.len() > 512 { 357 return Err(ValidationError::InvalidRecord( 358 "Record key exceeds maximum length of 512".to_string(), 359 )); 360 } 361 if rkey == "." || rkey == ".." { 362 return Err(ValidationError::InvalidRecord( 363 "Record key cannot be '.' or '..'".to_string(), 364 )); 365 } 366 let valid_chars = rkey 367 .chars() 368 .all(|c| c.is_ascii_alphanumeric() || c == '.' || c == '-' || c == '_' || c == '~'); 369 if !valid_chars { 370 return Err(ValidationError::InvalidRecord( 371 "Record key contains invalid characters (must be alphanumeric, '.', '-', '_', or '~')" 372 .to_string(), 373 )); 374 } 375 Ok(()) 376} 377 378pub fn validate_collection_nsid(collection: &str) -> Result<(), ValidationError> { 379 if collection.is_empty() { 380 return Err(ValidationError::InvalidRecord( 381 "Collection NSID cannot be empty".to_string(), 382 )); 383 } 384 let parts: Vec<&str> = collection.split('.').collect(); 385 if parts.len() < 3 { 386 return Err(ValidationError::InvalidRecord( 387 "Collection NSID must have at least 3 segments".to_string(), 388 )); 389 } 390 for part in &parts { 391 if part.is_empty() { 392 return Err(ValidationError::InvalidRecord( 393 "Collection NSID segments cannot be empty".to_string(), 394 )); 395 } 396 if !part.chars().all(|c| c.is_ascii_alphanumeric() || c == '-') { 397 return Err(ValidationError::InvalidRecord( 398 "Collection NSID segments must be alphanumeric or hyphens".to_string(), 399 )); 400 } 401 } 402 Ok(()) 403} 404 405#[cfg(test)] 406mod tests { 407 use super::*; 408 use serde_json::json; 409 410 #[test] 411 fn test_validate_post() { 412 let validator = RecordValidator::new(); 413 let valid_post = json!({ 414 "$type": "app.bsky.feed.post", 415 "text": "Hello, world!", 416 "createdAt": "2024-01-01T00:00:00.000Z" 417 }); 418 assert_eq!( 419 validator 420 .validate(&valid_post, "app.bsky.feed.post") 421 .unwrap(), 422 ValidationStatus::Valid 423 ); 424 } 425 426 #[test] 427 fn test_validate_post_missing_text() { 428 let validator = RecordValidator::new(); 429 let invalid_post = json!({ 430 "$type": "app.bsky.feed.post", 431 "createdAt": "2024-01-01T00:00:00.000Z" 432 }); 433 assert!( 434 validator 435 .validate(&invalid_post, "app.bsky.feed.post") 436 .is_err() 437 ); 438 } 439 440 #[test] 441 fn test_validate_type_mismatch() { 442 let validator = RecordValidator::new(); 443 let record = json!({ 444 "$type": "app.bsky.feed.like", 445 "subject": {"uri": "at://did:plc:test/app.bsky.feed.post/123", "cid": "bafyrei..."}, 446 "createdAt": "2024-01-01T00:00:00.000Z" 447 }); 448 let result = validator.validate(&record, "app.bsky.feed.post"); 449 assert!(matches!(result, Err(ValidationError::TypeMismatch { .. }))); 450 } 451 452 #[test] 453 fn test_validate_unknown_type() { 454 let validator = RecordValidator::new(); 455 let record = json!({ 456 "$type": "com.example.custom", 457 "data": "test" 458 }); 459 assert_eq!( 460 validator.validate(&record, "com.example.custom").unwrap(), 461 ValidationStatus::Unknown 462 ); 463 } 464 465 #[test] 466 fn test_validate_unknown_type_strict() { 467 let validator = RecordValidator::new().require_lexicon(true); 468 let record = json!({ 469 "$type": "com.example.custom", 470 "data": "test" 471 }); 472 let result = validator.validate(&record, "com.example.custom"); 473 assert!(matches!(result, Err(ValidationError::UnknownType(_)))); 474 } 475 476 #[test] 477 fn test_validate_record_key() { 478 assert!(validate_record_key("valid-key_123").is_ok()); 479 assert!(validate_record_key("3k2n5j2").is_ok()); 480 assert!(validate_record_key(".").is_err()); 481 assert!(validate_record_key("..").is_err()); 482 assert!(validate_record_key("").is_err()); 483 assert!(validate_record_key("invalid/key").is_err()); 484 } 485 486 #[test] 487 fn test_validate_collection_nsid() { 488 assert!(validate_collection_nsid("app.bsky.feed.post").is_ok()); 489 assert!(validate_collection_nsid("com.atproto.repo.record").is_ok()); 490 assert!(validate_collection_nsid("invalid").is_err()); 491 assert!(validate_collection_nsid("a.b").is_err()); 492 assert!(validate_collection_nsid("").is_err()); 493 } 494}