this repo has no description
1use serde_json::Value; 2use thiserror::Error; 3 4#[derive(Debug, Error)] 5pub enum ValidationError { 6 #[error("No $type provided")] 7 MissingType, 8 #[error("Invalid $type: expected {expected}, got {actual}")] 9 TypeMismatch { expected: String, actual: String }, 10 #[error("Missing required field: {0}")] 11 MissingField(String), 12 #[error("Invalid field value at {path}: {message}")] 13 InvalidField { path: String, message: String }, 14 #[error("Invalid datetime format at {path}: must be RFC-3339/ISO-8601")] 15 InvalidDatetime { path: String }, 16 #[error("Invalid record: {0}")] 17 InvalidRecord(String), 18 #[error("Unknown record type: {0}")] 19 UnknownType(String), 20} 21 22#[derive(Debug, Clone, Copy, PartialEq, Eq)] 23pub enum ValidationStatus { 24 Valid, 25 Unknown, 26 Invalid, 27} 28 29pub struct RecordValidator { 30 require_lexicon: bool, 31} 32 33impl Default for RecordValidator { 34 fn default() -> Self { 35 Self::new() 36 } 37} 38 39impl RecordValidator { 40 pub fn new() -> Self { 41 Self { 42 require_lexicon: false, 43 } 44 } 45 46 pub fn require_lexicon(mut self, require: bool) -> Self { 47 self.require_lexicon = require; 48 self 49 } 50 51 pub fn validate( 52 &self, 53 record: &Value, 54 collection: &str, 55 ) -> Result<ValidationStatus, ValidationError> { 56 let obj = record.as_object().ok_or_else(|| { 57 ValidationError::InvalidRecord("Record must be an object".to_string()) 58 })?; 59 let record_type = obj 60 .get("$type") 61 .and_then(|v| v.as_str()) 62 .ok_or(ValidationError::MissingType)?; 63 if record_type != collection { 64 return Err(ValidationError::TypeMismatch { 65 expected: collection.to_string(), 66 actual: record_type.to_string(), 67 }); 68 } 69 if let Some(created_at) = obj.get("createdAt").and_then(|v| v.as_str()) { 70 validate_datetime(created_at, "createdAt")?; 71 } 72 match record_type { 73 "app.bsky.feed.post" => self.validate_post(obj)?, 74 "app.bsky.actor.profile" => self.validate_profile(obj)?, 75 "app.bsky.feed.like" => self.validate_like(obj)?, 76 "app.bsky.feed.repost" => self.validate_repost(obj)?, 77 "app.bsky.graph.follow" => self.validate_follow(obj)?, 78 "app.bsky.graph.block" => self.validate_block(obj)?, 79 "app.bsky.graph.list" => self.validate_list(obj)?, 80 "app.bsky.graph.listitem" => self.validate_list_item(obj)?, 81 "app.bsky.feed.generator" => self.validate_feed_generator(obj)?, 82 "app.bsky.feed.threadgate" => self.validate_threadgate(obj)?, 83 "app.bsky.labeler.service" => self.validate_labeler_service(obj)?, 84 _ => { 85 if self.require_lexicon { 86 return Err(ValidationError::UnknownType(record_type.to_string())); 87 } 88 return Ok(ValidationStatus::Unknown); 89 } 90 } 91 Ok(ValidationStatus::Valid) 92 } 93 94 fn validate_post(&self, obj: &serde_json::Map<String, Value>) -> Result<(), ValidationError> { 95 if !obj.contains_key("text") { 96 return Err(ValidationError::MissingField("text".to_string())); 97 } 98 if !obj.contains_key("createdAt") { 99 return Err(ValidationError::MissingField("createdAt".to_string())); 100 } 101 if let Some(text) = obj.get("text").and_then(|v| v.as_str()) { 102 let grapheme_count = text.chars().count(); 103 if grapheme_count > 3000 { 104 return Err(ValidationError::InvalidField { 105 path: "text".to_string(), 106 message: format!( 107 "Text exceeds maximum length of 3000 characters (got {})", 108 grapheme_count 109 ), 110 }); 111 } 112 } 113 if let Some(langs) = obj.get("langs").and_then(|v| v.as_array()) 114 && langs.len() > 3 115 { 116 return Err(ValidationError::InvalidField { 117 path: "langs".to_string(), 118 message: "Maximum 3 languages allowed".to_string(), 119 }); 120 } 121 if let Some(tags) = obj.get("tags").and_then(|v| v.as_array()) { 122 if tags.len() > 8 { 123 return Err(ValidationError::InvalidField { 124 path: "tags".to_string(), 125 message: "Maximum 8 tags allowed".to_string(), 126 }); 127 } 128 for (i, tag) in tags.iter().enumerate() { 129 if let Some(tag_str) = tag.as_str() 130 && tag_str.len() > 640 131 { 132 return Err(ValidationError::InvalidField { 133 path: format!("tags/{}", i), 134 message: "Tag exceeds maximum length of 640 bytes".to_string(), 135 }); 136 } 137 } 138 } 139 Ok(()) 140 } 141 142 fn validate_profile( 143 &self, 144 obj: &serde_json::Map<String, Value>, 145 ) -> Result<(), ValidationError> { 146 if let Some(display_name) = obj.get("displayName").and_then(|v| v.as_str()) { 147 let grapheme_count = display_name.chars().count(); 148 if grapheme_count > 640 { 149 return Err(ValidationError::InvalidField { 150 path: "displayName".to_string(), 151 message: format!( 152 "Display name exceeds maximum length of 640 characters (got {})", 153 grapheme_count 154 ), 155 }); 156 } 157 } 158 if let Some(description) = obj.get("description").and_then(|v| v.as_str()) { 159 let grapheme_count = description.chars().count(); 160 if grapheme_count > 2560 { 161 return Err(ValidationError::InvalidField { 162 path: "description".to_string(), 163 message: format!( 164 "Description exceeds maximum length of 2560 characters (got {})", 165 grapheme_count 166 ), 167 }); 168 } 169 } 170 Ok(()) 171 } 172 173 fn validate_like(&self, obj: &serde_json::Map<String, Value>) -> Result<(), ValidationError> { 174 if !obj.contains_key("subject") { 175 return Err(ValidationError::MissingField("subject".to_string())); 176 } 177 if !obj.contains_key("createdAt") { 178 return Err(ValidationError::MissingField("createdAt".to_string())); 179 } 180 self.validate_strong_ref(obj.get("subject"), "subject")?; 181 Ok(()) 182 } 183 184 fn validate_repost(&self, obj: &serde_json::Map<String, Value>) -> Result<(), ValidationError> { 185 if !obj.contains_key("subject") { 186 return Err(ValidationError::MissingField("subject".to_string())); 187 } 188 if !obj.contains_key("createdAt") { 189 return Err(ValidationError::MissingField("createdAt".to_string())); 190 } 191 self.validate_strong_ref(obj.get("subject"), "subject")?; 192 Ok(()) 193 } 194 195 fn validate_follow(&self, obj: &serde_json::Map<String, Value>) -> Result<(), ValidationError> { 196 if !obj.contains_key("subject") { 197 return Err(ValidationError::MissingField("subject".to_string())); 198 } 199 if !obj.contains_key("createdAt") { 200 return Err(ValidationError::MissingField("createdAt".to_string())); 201 } 202 if let Some(subject) = obj.get("subject").and_then(|v| v.as_str()) 203 && !subject.starts_with("did:") 204 { 205 return Err(ValidationError::InvalidField { 206 path: "subject".to_string(), 207 message: "Subject must be a DID".to_string(), 208 }); 209 } 210 Ok(()) 211 } 212 213 fn validate_block(&self, obj: &serde_json::Map<String, Value>) -> Result<(), ValidationError> { 214 if !obj.contains_key("subject") { 215 return Err(ValidationError::MissingField("subject".to_string())); 216 } 217 if !obj.contains_key("createdAt") { 218 return Err(ValidationError::MissingField("createdAt".to_string())); 219 } 220 if let Some(subject) = obj.get("subject").and_then(|v| v.as_str()) 221 && !subject.starts_with("did:") 222 { 223 return Err(ValidationError::InvalidField { 224 path: "subject".to_string(), 225 message: "Subject must be a DID".to_string(), 226 }); 227 } 228 Ok(()) 229 } 230 231 fn validate_list(&self, obj: &serde_json::Map<String, Value>) -> Result<(), ValidationError> { 232 if !obj.contains_key("name") { 233 return Err(ValidationError::MissingField("name".to_string())); 234 } 235 if !obj.contains_key("purpose") { 236 return Err(ValidationError::MissingField("purpose".to_string())); 237 } 238 if !obj.contains_key("createdAt") { 239 return Err(ValidationError::MissingField("createdAt".to_string())); 240 } 241 if let Some(name) = obj.get("name").and_then(|v| v.as_str()) 242 && (name.is_empty() || name.len() > 64) 243 { 244 return Err(ValidationError::InvalidField { 245 path: "name".to_string(), 246 message: "Name must be 1-64 characters".to_string(), 247 }); 248 } 249 Ok(()) 250 } 251 252 fn validate_list_item( 253 &self, 254 obj: &serde_json::Map<String, Value>, 255 ) -> Result<(), ValidationError> { 256 if !obj.contains_key("subject") { 257 return Err(ValidationError::MissingField("subject".to_string())); 258 } 259 if !obj.contains_key("list") { 260 return Err(ValidationError::MissingField("list".to_string())); 261 } 262 if !obj.contains_key("createdAt") { 263 return Err(ValidationError::MissingField("createdAt".to_string())); 264 } 265 Ok(()) 266 } 267 268 fn validate_feed_generator( 269 &self, 270 obj: &serde_json::Map<String, Value>, 271 ) -> Result<(), ValidationError> { 272 if !obj.contains_key("did") { 273 return Err(ValidationError::MissingField("did".to_string())); 274 } 275 if !obj.contains_key("displayName") { 276 return Err(ValidationError::MissingField("displayName".to_string())); 277 } 278 if !obj.contains_key("createdAt") { 279 return Err(ValidationError::MissingField("createdAt".to_string())); 280 } 281 if let Some(display_name) = obj.get("displayName").and_then(|v| v.as_str()) 282 && (display_name.is_empty() || display_name.len() > 240) 283 { 284 return Err(ValidationError::InvalidField { 285 path: "displayName".to_string(), 286 message: "displayName must be 1-240 characters".to_string(), 287 }); 288 } 289 Ok(()) 290 } 291 292 fn validate_threadgate( 293 &self, 294 obj: &serde_json::Map<String, Value>, 295 ) -> Result<(), ValidationError> { 296 if !obj.contains_key("post") { 297 return Err(ValidationError::MissingField("post".to_string())); 298 } 299 if !obj.contains_key("createdAt") { 300 return Err(ValidationError::MissingField("createdAt".to_string())); 301 } 302 Ok(()) 303 } 304 305 fn validate_labeler_service( 306 &self, 307 obj: &serde_json::Map<String, Value>, 308 ) -> Result<(), ValidationError> { 309 if !obj.contains_key("policies") { 310 return Err(ValidationError::MissingField("policies".to_string())); 311 } 312 if !obj.contains_key("createdAt") { 313 return Err(ValidationError::MissingField("createdAt".to_string())); 314 } 315 Ok(()) 316 } 317 318 fn validate_strong_ref( 319 &self, 320 value: Option<&Value>, 321 path: &str, 322 ) -> Result<(), ValidationError> { 323 let obj = 324 value 325 .and_then(|v| v.as_object()) 326 .ok_or_else(|| ValidationError::InvalidField { 327 path: path.to_string(), 328 message: "Must be a strong reference object".to_string(), 329 })?; 330 if !obj.contains_key("uri") { 331 return Err(ValidationError::MissingField(format!("{}/uri", path))); 332 } 333 if !obj.contains_key("cid") { 334 return Err(ValidationError::MissingField(format!("{}/cid", path))); 335 } 336 if let Some(uri) = obj.get("uri").and_then(|v| v.as_str()) 337 && !uri.starts_with("at://") 338 { 339 return Err(ValidationError::InvalidField { 340 path: format!("{}/uri", path), 341 message: "URI must be an at:// URI".to_string(), 342 }); 343 } 344 Ok(()) 345 } 346} 347 348fn validate_datetime(value: &str, path: &str) -> Result<(), ValidationError> { 349 if chrono::DateTime::parse_from_rfc3339(value).is_err() { 350 return Err(ValidationError::InvalidDatetime { 351 path: path.to_string(), 352 }); 353 } 354 Ok(()) 355} 356 357pub fn validate_record_key(rkey: &str) -> Result<(), ValidationError> { 358 if rkey.is_empty() { 359 return Err(ValidationError::InvalidRecord( 360 "Record key cannot be empty".to_string(), 361 )); 362 } 363 if rkey.len() > 512 { 364 return Err(ValidationError::InvalidRecord( 365 "Record key exceeds maximum length of 512".to_string(), 366 )); 367 } 368 if rkey == "." || rkey == ".." { 369 return Err(ValidationError::InvalidRecord( 370 "Record key cannot be '.' or '..'".to_string(), 371 )); 372 } 373 let valid_chars = rkey 374 .chars() 375 .all(|c| c.is_ascii_alphanumeric() || c == '.' || c == '-' || c == '_' || c == '~'); 376 if !valid_chars { 377 return Err(ValidationError::InvalidRecord( 378 "Record key contains invalid characters (must be alphanumeric, '.', '-', '_', or '~')" 379 .to_string(), 380 )); 381 } 382 Ok(()) 383} 384 385pub fn validate_collection_nsid(collection: &str) -> Result<(), ValidationError> { 386 if collection.is_empty() { 387 return Err(ValidationError::InvalidRecord( 388 "Collection NSID cannot be empty".to_string(), 389 )); 390 } 391 let parts: Vec<&str> = collection.split('.').collect(); 392 if parts.len() < 3 { 393 return Err(ValidationError::InvalidRecord( 394 "Collection NSID must have at least 3 segments".to_string(), 395 )); 396 } 397 for part in &parts { 398 if part.is_empty() { 399 return Err(ValidationError::InvalidRecord( 400 "Collection NSID segments cannot be empty".to_string(), 401 )); 402 } 403 if !part.chars().all(|c| c.is_ascii_alphanumeric() || c == '-') { 404 return Err(ValidationError::InvalidRecord( 405 "Collection NSID segments must be alphanumeric or hyphens".to_string(), 406 )); 407 } 408 } 409 Ok(()) 410} 411 412#[cfg(test)] 413mod tests { 414 use super::*; 415 use serde_json::json; 416 417 #[test] 418 fn test_validate_post() { 419 let validator = RecordValidator::new(); 420 let valid_post = json!({ 421 "$type": "app.bsky.feed.post", 422 "text": "Hello, world!", 423 "createdAt": "2024-01-01T00:00:00.000Z" 424 }); 425 assert_eq!( 426 validator 427 .validate(&valid_post, "app.bsky.feed.post") 428 .unwrap(), 429 ValidationStatus::Valid 430 ); 431 } 432 433 #[test] 434 fn test_validate_post_missing_text() { 435 let validator = RecordValidator::new(); 436 let invalid_post = json!({ 437 "$type": "app.bsky.feed.post", 438 "createdAt": "2024-01-01T00:00:00.000Z" 439 }); 440 assert!( 441 validator 442 .validate(&invalid_post, "app.bsky.feed.post") 443 .is_err() 444 ); 445 } 446 447 #[test] 448 fn test_validate_type_mismatch() { 449 let validator = RecordValidator::new(); 450 let record = json!({ 451 "$type": "app.bsky.feed.like", 452 "subject": {"uri": "at://did:plc:test/app.bsky.feed.post/123", "cid": "bafyrei..."}, 453 "createdAt": "2024-01-01T00:00:00.000Z" 454 }); 455 let result = validator.validate(&record, "app.bsky.feed.post"); 456 assert!(matches!(result, Err(ValidationError::TypeMismatch { .. }))); 457 } 458 459 #[test] 460 fn test_validate_unknown_type() { 461 let validator = RecordValidator::new(); 462 let record = json!({ 463 "$type": "com.example.custom", 464 "data": "test" 465 }); 466 assert_eq!( 467 validator.validate(&record, "com.example.custom").unwrap(), 468 ValidationStatus::Unknown 469 ); 470 } 471 472 #[test] 473 fn test_validate_unknown_type_strict() { 474 let validator = RecordValidator::new().require_lexicon(true); 475 let record = json!({ 476 "$type": "com.example.custom", 477 "data": "test" 478 }); 479 let result = validator.validate(&record, "com.example.custom"); 480 assert!(matches!(result, Err(ValidationError::UnknownType(_)))); 481 } 482 483 #[test] 484 fn test_validate_record_key() { 485 assert!(validate_record_key("valid-key_123").is_ok()); 486 assert!(validate_record_key("3k2n5j2").is_ok()); 487 assert!(validate_record_key(".").is_err()); 488 assert!(validate_record_key("..").is_err()); 489 assert!(validate_record_key("").is_err()); 490 assert!(validate_record_key("invalid/key").is_err()); 491 } 492 493 #[test] 494 fn test_validate_collection_nsid() { 495 assert!(validate_collection_nsid("app.bsky.feed.post").is_ok()); 496 assert!(validate_collection_nsid("com.atproto.repo.record").is_ok()); 497 assert!(validate_collection_nsid("invalid").is_err()); 498 assert!(validate_collection_nsid("a.b").is_err()); 499 assert!(validate_collection_nsid("").is_err()); 500 } 501}