A better Rust ATProto crate
at main 1319 lines 42 kB view raw
1//! Runtime validation of Data values against lexicon schemas 2//! 3//! This module provides infrastructure for validating untyped `Data` values against 4//! lexicon schemas, enabling partial deserialization, debugging, and schema migration. 5 6use crate::lexicon::{LexArrayItem, LexObjectProperty}; 7use crate::ref_utils::RefPath; 8use crate::schema::SchemaRegistry; 9use cid::Cid as IpldCid; 10use dashmap::DashMap; 11use jacquard_common::{smol_str, types::value::Data}; 12use sha2::{Digest, Sha256}; 13use smol_str::SmolStr; 14use std::{ 15 fmt, 16 sync::{Arc, LazyLock}, 17}; 18 19/// Path to a value within a data structure 20/// 21/// Tracks the location of values during validation for precise error reporting. 22#[derive(Debug, Clone, PartialEq, Eq)] 23pub struct ValidationPath { 24 segments: Vec<PathSegment>, 25} 26 27/// A segment in a validation path 28#[derive(Debug, Clone, PartialEq, Eq)] 29pub enum PathSegment { 30 /// Object field access 31 Field(SmolStr), 32 /// Array index access 33 Index(usize), 34 /// Union variant discriminator 35 UnionVariant(SmolStr), 36} 37 38impl ValidationPath { 39 /// Create a new empty path 40 pub fn new() -> Self { 41 Self { 42 segments: Vec::new(), 43 } 44 } 45 46 /// Create a path with a single field segment 47 pub fn from_field(name: &str) -> Self { 48 let mut path = Self::new(); 49 path.push_field(name); 50 path 51 } 52 53 /// Add a field segment to the path 54 pub fn push_field(&mut self, name: &str) { 55 self.segments.push(PathSegment::Field(name.into())); 56 } 57 58 /// Add an index segment to the path 59 pub fn push_index(&mut self, idx: usize) { 60 self.segments.push(PathSegment::Index(idx)); 61 } 62 63 /// Add a union variant segment to the path 64 pub fn push_variant(&mut self, type_str: &str) { 65 self.segments 66 .push(PathSegment::UnionVariant(type_str.into())); 67 } 68 69 /// Remove the last segment from the path 70 pub fn pop(&mut self) { 71 self.segments.pop(); 72 } 73 74 /// Get the depth of the path 75 pub fn depth(&self) -> usize { 76 self.segments.len() 77 } 78 79 /// Check if the path is empty 80 pub fn is_empty(&self) -> bool { 81 self.segments.is_empty() 82 } 83 84 pub fn segments(&self) -> &[PathSegment] { 85 &self.segments 86 } 87} 88 89impl Default for ValidationPath { 90 fn default() -> Self { 91 Self::new() 92 } 93} 94 95impl fmt::Display for ValidationPath { 96 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 97 if self.segments.is_empty() { 98 return write!(f, "(root)"); 99 } 100 101 for seg in &self.segments { 102 match seg { 103 PathSegment::Field(name) => write!(f, ".{}", name)?, 104 PathSegment::Index(idx) => write!(f, "[{}]", idx)?, 105 PathSegment::UnionVariant(t) => write!(f, "($type={})", t)?, 106 } 107 } 108 Ok(()) 109 } 110} 111 112/// Structural validation errors 113/// 114/// These errors indicate that the data structure doesn't match the schema's type expectations. 115#[derive(Debug, Clone, thiserror::Error, miette::Diagnostic)] 116#[non_exhaustive] 117pub enum StructuralError { 118 #[error("Type mismatch at {path}: expected {expected}, got {actual}")] 119 TypeMismatch { 120 path: ValidationPath, 121 expected: jacquard_common::types::DataModelType, 122 actual: jacquard_common::types::DataModelType, 123 }, 124 125 #[error("Missing required field at {path}: '{field}'")] 126 MissingRequiredField { 127 path: ValidationPath, 128 field: SmolStr, 129 }, 130 131 #[error("Missing union discriminator ($type) at {path}")] 132 MissingUnionDiscriminator { path: ValidationPath }, 133 134 #[error("Union type mismatch at {path}: $type='{actual_type}' not in [{expected_refs}]")] 135 UnionNoMatch { 136 path: ValidationPath, 137 actual_type: SmolStr, 138 expected_refs: SmolStr, 139 }, 140 141 #[error("Unresolved ref at {path}: '{ref_nsid}'")] 142 UnresolvedRef { 143 path: ValidationPath, 144 ref_nsid: SmolStr, 145 }, 146 147 #[error("Reference cycle detected at {path}: '{ref_nsid}' (stack: {stack})")] 148 RefCycle { 149 path: ValidationPath, 150 ref_nsid: SmolStr, 151 stack: SmolStr, 152 }, 153 154 #[error("Max validation depth exceeded at {path}: {max}")] 155 MaxDepthExceeded { path: ValidationPath, max: usize }, 156} 157 158/// Constraint validation errors 159/// 160/// These errors indicate that the data violates lexicon constraints like max_length, 161/// max_graphemes, ranges, etc. The structure is correct but values are out of bounds. 162#[derive(Debug, Clone, thiserror::Error, miette::Diagnostic)] 163#[non_exhaustive] 164pub enum ConstraintError { 165 #[error("{path} exceeds max length: {actual} > {max}")] 166 MaxLength { 167 path: ValidationPath, 168 max: usize, 169 actual: usize, 170 }, 171 172 #[error("{path} exceeds max graphemes: {actual} > {max}")] 173 MaxGraphemes { 174 path: ValidationPath, 175 max: usize, 176 actual: usize, 177 }, 178 179 #[error("{path} below min length: {actual} < {min}")] 180 MinLength { 181 path: ValidationPath, 182 min: usize, 183 actual: usize, 184 }, 185 186 #[error("{path} below min graphemes: {actual} < {min}")] 187 MinGraphemes { 188 path: ValidationPath, 189 min: usize, 190 actual: usize, 191 }, 192 193 #[error("{path} value {actual} exceeds maximum: {max}")] 194 Maximum { 195 path: ValidationPath, 196 max: i64, 197 actual: i64, 198 }, 199 200 #[error("{path} value {actual} below minimum: {min}")] 201 Minimum { 202 path: ValidationPath, 203 min: i64, 204 actual: i64, 205 }, 206} 207 208/// Unified validation error type 209#[derive(Debug, Clone, thiserror::Error)] 210#[non_exhaustive] 211pub enum ValidationError { 212 #[error(transparent)] 213 Structural(#[from] StructuralError), 214 215 #[error(transparent)] 216 Constraint(#[from] ConstraintError), 217} 218 219/// Cache key for validation results 220/// 221/// Content-addressed by CID to enable efficient caching across identical data. 222#[derive(Debug, Clone, Hash, Eq, PartialEq)] 223struct ValidationCacheKey { 224 nsid: SmolStr, 225 def_name: SmolStr, 226 cid: IpldCid, 227} 228 229impl ValidationCacheKey { 230 /// Create cache key from schema info and data 231 fn from_data<T: crate::schema::LexiconSchema>( 232 data: &Data, 233 ) -> Result<Self, CidComputationError> { 234 let cid = compute_data_cid(data)?; 235 Ok(Self { 236 nsid: SmolStr::new_static(T::nsid()), 237 def_name: SmolStr::new_static(T::def_name()), 238 cid, 239 }) 240 } 241} 242 243/// Errors that can occur when computing CIDs 244#[derive(Debug, thiserror::Error)] 245#[non_exhaustive] 246pub enum CidComputationError { 247 #[error("Failed to serialize data to DAG-CBOR: {0}")] 248 DagCborEncode(#[from] serde_ipld_dagcbor::EncodeError<std::collections::TryReserveError>), 249 250 #[error("Failed to create multihash: {0}")] 251 Multihash(#[from] multihash::Error), 252} 253 254/// Compute CID for Data value 255/// 256/// Uses SHA-256 hash and DAG-CBOR codec for content addressing. 257fn compute_data_cid(data: &Data) -> Result<IpldCid, CidComputationError> { 258 // Serialize to DAG-CBOR 259 let dag_cbor = data.to_dag_cbor()?; 260 261 // Compute SHA-256 hash 262 let hash = Sha256::digest(&dag_cbor); 263 264 // Create multihash (code 0x12 = sha2-256) 265 let multihash = multihash::Multihash::wrap(0x12, &hash)?; 266 267 // Create CIDv1 with dag-cbor codec (0x71) 268 Ok(IpldCid::new_v1(0x71, multihash)) 269} 270 271/// Trait for converting lexicon types to object properties 272/// 273/// This enables type-safe conversion between array items and object properties 274/// for unified validation logic. 275trait IntoObjectProperty<'a> { 276 /// Convert this type to an equivalent object property 277 fn into_object_property(self) -> LexObjectProperty<'a>; 278} 279 280impl<'a> IntoObjectProperty<'a> for LexArrayItem<'a> { 281 fn into_object_property(self) -> LexObjectProperty<'a> { 282 match self { 283 LexArrayItem::String(s) => LexObjectProperty::String(s), 284 LexArrayItem::Integer(i) => LexObjectProperty::Integer(i), 285 LexArrayItem::Boolean(b) => LexObjectProperty::Boolean(b), 286 LexArrayItem::Object(o) => LexObjectProperty::Object(o), 287 LexArrayItem::Unknown(u) => LexObjectProperty::Unknown(u), 288 LexArrayItem::Bytes(b) => LexObjectProperty::Bytes(b), 289 LexArrayItem::CidLink(c) => LexObjectProperty::CidLink(c), 290 LexArrayItem::Blob(b) => LexObjectProperty::Blob(b), 291 LexArrayItem::Ref(r) => LexObjectProperty::Ref(r), 292 LexArrayItem::Union(u) => LexObjectProperty::Union(u), 293 } 294 } 295} 296 297/// Result of validating Data against a schema 298/// 299/// Distinguishes between structural errors (type mismatches, missing fields) and 300/// constraint violations (max_length, ranges, etc.). 301#[derive(Debug, Clone)] 302pub enum ValidationResult { 303 /// Only structural validation was performed (or data was structurally invalid) 304 StructuralOnly { structural: Vec<StructuralError> }, 305 /// Both structural and constraint validation were performed 306 Complete { 307 structural: Vec<StructuralError>, 308 constraints: Vec<ConstraintError>, 309 }, 310} 311 312impl ValidationResult { 313 /// Check if validation passed (no structural or constraint errors) 314 pub fn is_valid(&self) -> bool { 315 match self { 316 ValidationResult::StructuralOnly { structural } => structural.is_empty(), 317 ValidationResult::Complete { 318 structural, 319 constraints, 320 } => structural.is_empty() && constraints.is_empty(), 321 } 322 } 323 324 /// Check if structurally valid (ignoring constraint checks) 325 pub fn is_structurally_valid(&self) -> bool { 326 match self { 327 ValidationResult::StructuralOnly { structural } => structural.is_empty(), 328 ValidationResult::Complete { structural, .. } => structural.is_empty(), 329 } 330 } 331 332 /// Get structural errors 333 pub fn structural_errors(&self) -> &[StructuralError] { 334 match self { 335 ValidationResult::StructuralOnly { structural } => structural, 336 ValidationResult::Complete { structural, .. } => structural, 337 } 338 } 339 340 /// Get constraint errors 341 pub fn constraint_errors(&self) -> &[ConstraintError] { 342 match self { 343 ValidationResult::StructuralOnly { .. } => &[], 344 ValidationResult::Complete { constraints, .. } => constraints, 345 } 346 } 347 348 /// Check if there are any constraint violations 349 pub fn has_constraint_violations(&self) -> bool { 350 !self.constraint_errors().is_empty() 351 } 352 353 /// Get all errors (structural and constraint) 354 pub fn all_errors(&self) -> impl Iterator<Item = ValidationError> + '_ { 355 self.structural_errors() 356 .iter() 357 .cloned() 358 .map(ValidationError::Structural) 359 .chain( 360 self.constraint_errors() 361 .iter() 362 .cloned() 363 .map(ValidationError::Constraint), 364 ) 365 } 366} 367 368/// Schema validator with caching 369/// 370/// Validates Data values against lexicon schemas, caching results by content hash. 371pub struct SchemaValidator { 372 registry: SchemaRegistry, 373 cache: DashMap<ValidationCacheKey, Arc<ValidationResult>>, 374} 375 376static VALIDATOR: LazyLock<SchemaValidator> = LazyLock::new(|| SchemaValidator { 377 registry: SchemaRegistry::from_inventory(), 378 cache: DashMap::new(), 379}); 380 381impl SchemaValidator { 382 /// Get the global validator instance 383 pub fn global() -> &'static Self { 384 &VALIDATOR 385 } 386 387 /// Create a new validator with empty registry 388 pub fn new() -> Self { 389 Self { 390 registry: SchemaRegistry::new(), 391 cache: DashMap::new(), 392 } 393 } 394 395 pub fn from_registry(registry: SchemaRegistry) -> Self { 396 Self { 397 registry, 398 cache: DashMap::new(), 399 } 400 } 401 402 /// Validate data against a schema (structural and constraints) 403 /// 404 /// Performs both structural validation (types, required fields) and constraint 405 /// validation (max_length, ranges, etc.). Results are cached by content hash. 406 pub fn validate<T: crate::schema::LexiconSchema>( 407 &self, 408 data: &Data, 409 ) -> Result<ValidationResult, CidComputationError> { 410 // Compute cache key 411 let key = ValidationCacheKey::from_data::<T>(data)?; 412 413 // Check cache (clone Arc immediately to avoid holding ref) 414 if let Some(cached) = self.cache.get(&key).map(|r| Arc::clone(&r)) { 415 return Ok((*cached).clone()); 416 } 417 418 // Perform validation 419 let result = self.validate_uncached::<T>(data); 420 421 // Cache result 422 self.cache.insert(key, Arc::new(result.clone())); 423 424 Ok(result) 425 } 426 427 /// Validate only the structural aspects of data against a schema 428 /// 429 /// Only checks types, required fields, and schema structure. Does not check 430 /// constraints like max_length, ranges, etc. This is faster when you only 431 /// care about type correctness. 432 pub fn validate_structural<T: crate::schema::LexiconSchema>( 433 &self, 434 data: &Data, 435 ) -> ValidationResult { 436 self.validate_structural_uncached::<T>(data) 437 } 438 439 /// Validate without caching (internal) 440 fn validate_uncached<T: crate::schema::LexiconSchema>(&self, data: &Data) -> ValidationResult { 441 let def = match self.registry.get_def(T::nsid(), T::def_name()) { 442 Some(d) => d, 443 None => { 444 // Schema not found - this is a structural error 445 return ValidationResult::StructuralOnly { 446 structural: vec![StructuralError::UnresolvedRef { 447 path: ValidationPath::new(), 448 ref_nsid: format!("{}#{}", T::nsid(), T::def_name()).into(), 449 }], 450 }; 451 } 452 }; 453 454 let mut path = ValidationPath::new(); 455 let mut ctx = ValidationContext::new(T::nsid(), T::def_name()); 456 457 let structural_errors = validate_def(&mut path, data, &def, &self.registry, &mut ctx); 458 459 // If structurally invalid, return structural errors only 460 if !structural_errors.is_empty() { 461 return ValidationResult::StructuralOnly { 462 structural: structural_errors, 463 }; 464 } 465 466 // Structurally valid - compute constraints eagerly 467 let mut path = ValidationPath::new(); 468 let constraint_errors = validate_constraints( 469 &mut path, 470 data, 471 T::nsid(), 472 T::def_name(), 473 Some(&Arc::new(self.registry.clone())), 474 ); 475 476 ValidationResult::Complete { 477 structural: structural_errors, 478 constraints: constraint_errors, 479 } 480 } 481 482 /// Validate structural aspects only without caching (internal) 483 fn validate_structural_uncached<T: crate::schema::LexiconSchema>( 484 &self, 485 data: &Data, 486 ) -> ValidationResult { 487 let def = match self.registry.get_def(T::nsid(), T::def_name()) { 488 Some(d) => d, 489 None => { 490 // Schema not found - this is a structural error 491 return ValidationResult::StructuralOnly { 492 structural: vec![StructuralError::UnresolvedRef { 493 path: ValidationPath::new(), 494 ref_nsid: format!("{}#{}", T::nsid(), T::def_name()).into(), 495 }], 496 }; 497 } 498 }; 499 500 let mut path = ValidationPath::new(); 501 let mut ctx = ValidationContext::new(T::nsid(), T::def_name()); 502 503 let structural_errors = validate_def(&mut path, data, &def, &self.registry, &mut ctx); 504 505 ValidationResult::StructuralOnly { 506 structural: structural_errors, 507 } 508 } 509 510 pub fn validate_by_nsid_structural(&self, nsid: &str, data: &Data) -> ValidationResult { 511 let mut split = nsid.split('#'); 512 let nsid = split.next().unwrap(); 513 let def_name = split.next().unwrap_or("main"); 514 let def = match self.registry.get_def(nsid, def_name) { 515 Some(d) => d, 516 None => { 517 // Schema not found - this is a structural error 518 return ValidationResult::StructuralOnly { 519 structural: vec![StructuralError::UnresolvedRef { 520 path: ValidationPath::new(), 521 ref_nsid: format!("{}#{}", nsid, def_name).into(), 522 }], 523 }; 524 } 525 }; 526 527 let mut path = ValidationPath::new(); 528 let mut ctx = ValidationContext::new(nsid, def_name); 529 530 let structural_errors = validate_def(&mut path, data, &def, &self.registry, &mut ctx); 531 532 ValidationResult::StructuralOnly { 533 structural: structural_errors, 534 } 535 } 536 537 pub fn validate_by_nsid(&self, nsid: &str, data: &Data) -> ValidationResult { 538 let mut split = nsid.split('#'); 539 let nsid = split.next().unwrap(); 540 let def_name = split.next().unwrap_or("main"); 541 let def = match self.registry.get_def(nsid, def_name) { 542 Some(d) => d, 543 None => { 544 // Schema not found - this is a structural error 545 return ValidationResult::StructuralOnly { 546 structural: vec![StructuralError::UnresolvedRef { 547 path: ValidationPath::new(), 548 ref_nsid: format!("{}#{}", nsid, def_name).into(), 549 }], 550 }; 551 } 552 }; 553 554 let mut path = ValidationPath::new(); 555 let mut ctx = ValidationContext::new(nsid, def_name); 556 557 let structural_errors = validate_def(&mut path, data, &def, &self.registry, &mut ctx); 558 559 // If structurally invalid, return structural errors only 560 if !structural_errors.is_empty() { 561 return ValidationResult::StructuralOnly { 562 structural: structural_errors, 563 }; 564 } 565 566 // Structurally valid - compute constraints eagerly 567 let mut path = ValidationPath::new(); 568 let constraint_errors = validate_constraints( 569 &mut path, 570 data, 571 nsid, 572 def_name, 573 Some(&Arc::new(self.registry.clone())), 574 ); 575 576 ValidationResult::Complete { 577 structural: structural_errors, 578 constraints: constraint_errors, 579 } 580 } 581 582 /// Get the schema registry 583 pub fn registry(&self) -> &SchemaRegistry { 584 &self.registry 585 } 586} 587 588impl Default for SchemaValidator { 589 fn default() -> Self { 590 Self::new() 591 } 592} 593 594/// Validation context for tracking refs and preventing cycles 595struct ValidationContext { 596 current_nsid: String, 597 current_def: String, 598 ref_stack: Vec<String>, 599 max_depth: usize, 600} 601 602impl ValidationContext { 603 fn new(nsid: &str, def_name: &str) -> Self { 604 Self { 605 current_nsid: nsid.to_string(), 606 current_def: def_name.to_string(), 607 ref_stack: Vec::new(), 608 max_depth: 32, 609 } 610 } 611} 612 613/// Validate data against a lexicon def 614fn validate_def( 615 path: &mut ValidationPath, 616 data: &Data, 617 def: &crate::lexicon::LexUserType, 618 registry: &SchemaRegistry, 619 ctx: &mut ValidationContext, 620) -> Vec<StructuralError> { 621 use crate::lexicon::LexUserType; 622 use jacquard_common::types::DataModelType; 623 624 match def { 625 LexUserType::Object(obj) => { 626 // Must be an object 627 let Data::Object(obj_data) = data else { 628 return vec![StructuralError::TypeMismatch { 629 path: path.clone(), 630 expected: DataModelType::Object, 631 actual: data.data_type(), 632 }]; 633 }; 634 635 let mut errors = Vec::new(); 636 637 // Check required fields 638 if let Some(required) = &obj.required { 639 for field in required { 640 if !obj_data.get(field.as_ref()).is_some() { 641 errors.push(StructuralError::MissingRequiredField { 642 path: path.clone(), 643 field: field.clone(), 644 }); 645 } 646 } 647 } 648 649 // Validate each property that's present 650 for (name, prop) in &obj.properties { 651 if let Some(field_data) = obj_data.get(name.as_ref()) { 652 path.push_field(name.as_ref()); 653 errors.extend(validate_property(path, field_data, prop, registry, ctx)); 654 path.pop(); 655 } 656 } 657 658 errors 659 } 660 LexUserType::Record(rec) => { 661 // Records are objects with record-specific metadata 662 let crate::lexicon::LexRecordRecord::Object(obj) = &rec.record; 663 664 let Data::Object(obj_data) = data else { 665 return vec![StructuralError::TypeMismatch { 666 path: path.clone(), 667 expected: data.data_type(), 668 actual: DataModelType::Object, 669 }]; 670 }; 671 672 let mut errors = Vec::new(); 673 674 // Check required fields 675 if let Some(required) = &obj.required { 676 for field in required { 677 if !obj_data.get(field.as_ref()).is_some() { 678 errors.push(StructuralError::MissingRequiredField { 679 path: path.clone(), 680 field: field.clone(), 681 }); 682 } 683 } 684 } 685 686 // Validate each property that's present 687 for (name, prop) in &obj.properties { 688 if let Some(field_data) = obj_data.get(name.as_ref()) { 689 path.push_field(name.as_ref()); 690 errors.extend(validate_property(path, field_data, prop, registry, ctx)); 691 path.pop(); 692 } 693 } 694 695 errors 696 } 697 // Token types are unit types, no validation needed beyond type checking 698 LexUserType::Token(_) => Vec::new(), 699 // XRPC types are endpoint definitions, not data types 700 LexUserType::XrpcQuery(_) 701 | LexUserType::XrpcProcedure(_) 702 | LexUserType::XrpcSubscription(_) => Vec::new(), 703 // Other types 704 _ => Vec::new(), 705 } 706} 707 708/// Validate data against a property schema 709fn validate_property( 710 path: &mut ValidationPath, 711 data: &Data, 712 prop: &crate::lexicon::LexObjectProperty, 713 registry: &SchemaRegistry, 714 ctx: &mut ValidationContext, 715) -> Vec<StructuralError> { 716 use crate::lexicon::LexObjectProperty; 717 use jacquard_common::types::DataModelType; 718 719 match prop { 720 LexObjectProperty::String(_) => { 721 // Accept any string type 722 if !matches!(data.data_type(), DataModelType::String(_)) { 723 vec![StructuralError::TypeMismatch { 724 path: path.clone(), 725 expected: DataModelType::String( 726 jacquard_common::types::LexiconStringType::String, 727 ), 728 actual: data.data_type(), 729 }] 730 } else { 731 Vec::new() 732 } 733 } 734 735 LexObjectProperty::Integer(_) => { 736 if !matches!(data.data_type(), DataModelType::Integer) { 737 vec![StructuralError::TypeMismatch { 738 path: path.clone(), 739 expected: DataModelType::Integer, 740 actual: data.data_type(), 741 }] 742 } else { 743 Vec::new() 744 } 745 } 746 747 LexObjectProperty::Boolean(_) => { 748 if !matches!(data.data_type(), DataModelType::Boolean) { 749 vec![StructuralError::TypeMismatch { 750 path: path.clone(), 751 expected: DataModelType::Boolean, 752 actual: data.data_type(), 753 }] 754 } else { 755 Vec::new() 756 } 757 } 758 759 LexObjectProperty::Object(obj) => { 760 let Data::Object(obj_data) = data else { 761 return vec![StructuralError::TypeMismatch { 762 path: path.clone(), 763 expected: DataModelType::Object, 764 actual: data.data_type(), 765 }]; 766 }; 767 768 let mut errors = Vec::new(); 769 770 // Check required fields 771 if let Some(required) = &obj.required { 772 for field in required { 773 if !obj_data.get(field.as_ref()).is_some() { 774 errors.push(StructuralError::MissingRequiredField { 775 path: path.clone(), 776 field: field.clone(), 777 }); 778 } 779 } 780 } 781 782 // Recursively validate each property 783 for (name, schema_prop) in &obj.properties { 784 if let Some(field_data) = obj_data.get(name.as_ref()) { 785 path.push_field(name.as_ref()); 786 errors.extend(validate_property( 787 path, 788 field_data, 789 schema_prop, 790 registry, 791 ctx, 792 )); 793 path.pop(); 794 } 795 } 796 797 errors 798 } 799 800 LexObjectProperty::Array(arr) => { 801 let Data::Array(array) = data else { 802 return vec![StructuralError::TypeMismatch { 803 path: path.clone(), 804 expected: DataModelType::Array, 805 actual: data.data_type(), 806 }]; 807 }; 808 809 let mut errors = Vec::new(); 810 for (idx, item) in array.iter().enumerate() { 811 path.push_index(idx); 812 errors.extend(validate_array_item(path, item, &arr.items, registry, ctx)); 813 path.pop(); 814 } 815 errors 816 } 817 818 LexObjectProperty::Union(u) => { 819 let Data::Object(obj) = data else { 820 return vec![StructuralError::TypeMismatch { 821 path: path.clone(), 822 expected: DataModelType::Object, 823 actual: data.data_type(), 824 }]; 825 }; 826 827 // Get $type discriminator 828 let Some(type_str) = obj.type_discriminator() else { 829 return vec![StructuralError::MissingUnionDiscriminator { path: path.clone() }]; 830 }; 831 832 // Reject empty $type 833 if type_str.is_empty() { 834 return vec![StructuralError::MissingUnionDiscriminator { path: path.clone() }]; 835 } 836 837 // Try to match against refs 838 for variant_ref in &u.refs { 839 let ref_path = RefPath::parse(variant_ref.as_ref(), Some(&ctx.current_nsid)); 840 let variant_nsid = ref_path.nsid().to_string(); 841 let variant_def = ref_path.def().to_string(); 842 let full_variant = ref_path.full_ref(); 843 844 // Match by full ref or just nsid 845 if type_str == full_variant || type_str == variant_nsid { 846 // Found match - validate against this variant 847 let Some(variant_def_type) = registry.get_def(&variant_nsid, &variant_def) 848 else { 849 return vec![StructuralError::UnresolvedRef { 850 path: path.clone(), 851 ref_nsid: full_variant.into(), 852 }]; 853 }; 854 855 path.push_variant(type_str); 856 let old_nsid = std::mem::replace(&mut ctx.current_nsid, variant_nsid); 857 let old_def = std::mem::replace(&mut ctx.current_def, variant_def); 858 859 let errors = validate_def(path, data, &variant_def_type, registry, ctx); 860 861 ctx.current_nsid = old_nsid; 862 ctx.current_def = old_def; 863 path.pop(); 864 865 return errors; 866 } 867 } 868 869 // No match found 870 if u.closed.unwrap_or(false) { 871 // Closed union - this is an error 872 let expected_refs = u 873 .refs 874 .iter() 875 .map(|r| r.as_ref()) 876 .collect::<Vec<_>>() 877 .join(", "); 878 vec![StructuralError::UnionNoMatch { 879 path: path.clone(), 880 actual_type: type_str.into(), 881 expected_refs: expected_refs.into(), 882 }] 883 } else { 884 // Open union - allow unknown variants 885 Vec::new() 886 } 887 } 888 889 LexObjectProperty::Ref(r) => { 890 // Depth check 891 if path.depth() >= ctx.max_depth { 892 return vec![StructuralError::MaxDepthExceeded { 893 path: path.clone(), 894 max: ctx.max_depth, 895 }]; 896 } 897 898 // Normalize ref 899 let ref_path = RefPath::parse(r.r#ref.as_ref(), Some(&ctx.current_nsid)); 900 let ref_nsid = ref_path.nsid().to_string(); 901 let ref_def = ref_path.def().to_string(); 902 let full_ref = ref_path.full_ref(); 903 904 // Cycle detection 905 if ctx.ref_stack.contains(&full_ref) { 906 let stack = ctx.ref_stack.join(" -> "); 907 return vec![StructuralError::RefCycle { 908 path: path.clone(), 909 ref_nsid: full_ref.into(), 910 stack: stack.into(), 911 }]; 912 } 913 914 // Look up ref 915 let Some(ref_def_type) = registry.get_def(&ref_nsid, &ref_def) else { 916 return vec![StructuralError::UnresolvedRef { 917 path: path.clone(), 918 ref_nsid: full_ref.into(), 919 }]; 920 }; 921 922 // Push, validate, pop 923 ctx.ref_stack.push(full_ref); 924 let old_nsid = std::mem::replace(&mut ctx.current_nsid, ref_nsid); 925 let old_def = std::mem::replace(&mut ctx.current_def, ref_def); 926 927 let errors = validate_def(path, data, &ref_def_type, registry, ctx); 928 929 ctx.current_nsid = old_nsid; 930 ctx.current_def = old_def; 931 ctx.ref_stack.pop(); 932 933 errors 934 } 935 936 LexObjectProperty::Bytes(_) => { 937 if !matches!(data.data_type(), DataModelType::Bytes) { 938 vec![StructuralError::TypeMismatch { 939 path: path.clone(), 940 expected: DataModelType::Bytes, 941 actual: data.data_type(), 942 }] 943 } else { 944 Vec::new() 945 } 946 } 947 948 LexObjectProperty::CidLink(_) => { 949 if !matches!(data.data_type(), DataModelType::CidLink) { 950 vec![StructuralError::TypeMismatch { 951 path: path.clone(), 952 expected: DataModelType::CidLink, 953 actual: data.data_type(), 954 }] 955 } else { 956 Vec::new() 957 } 958 } 959 960 LexObjectProperty::Blob(_) => { 961 if !matches!(data.data_type(), DataModelType::Blob) { 962 vec![StructuralError::TypeMismatch { 963 path: path.clone(), 964 expected: DataModelType::Blob, 965 actual: data.data_type(), 966 }] 967 } else { 968 Vec::new() 969 } 970 } 971 972 LexObjectProperty::Unknown(_) => { 973 // Any type allowed 974 Vec::new() 975 } 976 } 977} 978 979/// Validate array item against array item schema 980fn validate_array_item( 981 path: &mut ValidationPath, 982 data: &Data, 983 item_schema: &LexArrayItem, 984 registry: &SchemaRegistry, 985 ctx: &mut ValidationContext, 986) -> Vec<StructuralError> { 987 validate_property( 988 path, 989 data, 990 &item_schema.clone().into_object_property(), 991 registry, 992 ctx, 993 ) 994} 995 996// ============================================================================ 997// CONSTRAINT VALIDATION 998// ============================================================================ 999 1000/// Validate constraints on data against schema (entry point with optional registry) 1001fn validate_constraints( 1002 path: &mut ValidationPath, 1003 data: &Data, 1004 nsid: &str, 1005 def_name: &str, 1006 registry: Option<&Arc<SchemaRegistry>>, 1007) -> Vec<ConstraintError> { 1008 // Use provided registry or fall back to global inventory 1009 let fallback_registry; 1010 let registry_ref = match registry { 1011 Some(r) => r.as_ref(), 1012 None => { 1013 fallback_registry = SchemaRegistry::from_inventory(); 1014 &fallback_registry 1015 } 1016 }; 1017 1018 validate_constraints_impl(path, data, nsid, def_name, registry_ref) 1019} 1020 1021/// Internal implementation that takes materialized registry 1022fn validate_constraints_impl( 1023 path: &mut ValidationPath, 1024 data: &Data, 1025 nsid: &str, 1026 def_name: &str, 1027 registry: &SchemaRegistry, 1028) -> Vec<ConstraintError> { 1029 use crate::lexicon::LexUserType; 1030 1031 // Get schema def 1032 let Some(def) = registry.get_def(nsid, def_name) else { 1033 return Vec::new(); 1034 }; 1035 1036 match def { 1037 LexUserType::Object(obj) => { 1038 let Data::Object(obj_data) = data else { 1039 return Vec::new(); 1040 }; 1041 1042 let mut errors = Vec::new(); 1043 1044 // Check constraints on each property 1045 for (name, prop) in &obj.properties { 1046 if let Some(field_data) = obj_data.get(name.as_ref()) { 1047 path.push_field(name.as_ref()); 1048 errors.extend(check_property_constraints( 1049 path, field_data, prop, nsid, registry, 1050 )); 1051 path.pop(); 1052 } 1053 } 1054 1055 errors 1056 } 1057 LexUserType::Record(rec) => { 1058 // Records are objects with record-specific metadata 1059 let crate::lexicon::LexRecordRecord::Object(obj) = &rec.record; 1060 1061 let Data::Object(obj_data) = data else { 1062 return Vec::new(); 1063 }; 1064 1065 let mut errors = Vec::new(); 1066 1067 // Check constraints on each property 1068 for (name, prop) in &obj.properties { 1069 if let Some(field_data) = obj_data.get(name.as_ref()) { 1070 path.push_field(name.as_ref()); 1071 errors.extend(check_property_constraints( 1072 path, field_data, prop, nsid, registry, 1073 )); 1074 path.pop(); 1075 } 1076 } 1077 1078 errors 1079 } 1080 // Token types, XRPC types, and other types don't have constraints 1081 _ => Vec::new(), 1082 } 1083} 1084 1085/// Check constraints on a property 1086fn check_property_constraints( 1087 path: &mut ValidationPath, 1088 data: &Data, 1089 prop: &crate::lexicon::LexObjectProperty, 1090 current_nsid: &str, 1091 registry: &SchemaRegistry, 1092) -> Vec<ConstraintError> { 1093 use crate::lexicon::LexObjectProperty; 1094 1095 match prop { 1096 LexObjectProperty::String(s) => { 1097 if let Data::String(str_val) = data { 1098 check_string_constraints(path, str_val.as_str(), s) 1099 } else { 1100 Vec::new() 1101 } 1102 } 1103 1104 LexObjectProperty::Integer(i) => { 1105 if let Data::Integer(int_val) = data { 1106 check_integer_constraints(path, *int_val, i) 1107 } else { 1108 Vec::new() 1109 } 1110 } 1111 1112 LexObjectProperty::Array(arr) => { 1113 if let Data::Array(array) = data { 1114 let mut errors = check_array_constraints(path, array, arr); 1115 1116 // Also check constraints on array items 1117 for (idx, item) in array.iter().enumerate() { 1118 path.push_index(idx); 1119 errors.extend(check_array_item_constraints( 1120 path, 1121 item, 1122 &arr.items, 1123 current_nsid, 1124 registry, 1125 )); 1126 path.pop(); 1127 } 1128 1129 errors 1130 } else { 1131 Vec::new() 1132 } 1133 } 1134 1135 LexObjectProperty::Object(obj) => { 1136 if let Data::Object(obj_data) = data { 1137 let mut errors = Vec::new(); 1138 1139 // Recursively check nested object properties 1140 for (name, schema_prop) in &obj.properties { 1141 if let Some(field_data) = obj_data.get(name.as_ref()) { 1142 path.push_field(name.as_ref()); 1143 errors.extend(check_property_constraints( 1144 path, 1145 field_data, 1146 schema_prop, 1147 current_nsid, 1148 registry, 1149 )); 1150 path.pop(); 1151 } 1152 } 1153 1154 errors 1155 } else { 1156 Vec::new() 1157 } 1158 } 1159 1160 LexObjectProperty::Ref(r) => { 1161 // Follow ref and check constraints 1162 let ref_path = RefPath::parse(r.r#ref.as_ref(), Some(current_nsid)); 1163 let ref_nsid = ref_path.nsid(); 1164 let ref_def = ref_path.def(); 1165 1166 if registry.get_def(ref_nsid, ref_def).is_some() { 1167 validate_constraints_impl(path, data, ref_nsid, ref_def, registry) 1168 } else { 1169 Vec::new() 1170 } 1171 } 1172 1173 // Other property types don't have constraints 1174 _ => Vec::new(), 1175 } 1176} 1177 1178/// Check string constraints 1179fn check_string_constraints( 1180 path: &ValidationPath, 1181 value: &str, 1182 schema: &crate::lexicon::LexString, 1183) -> Vec<ConstraintError> { 1184 let mut errors = Vec::new(); 1185 1186 // Check byte length constraints 1187 let byte_len = value.len(); 1188 1189 if let Some(min) = schema.min_length { 1190 if byte_len < min as usize { 1191 errors.push(ConstraintError::MinLength { 1192 path: path.clone(), 1193 min: min as usize, 1194 actual: byte_len, 1195 }); 1196 } 1197 } 1198 1199 if let Some(max) = schema.max_length { 1200 if byte_len > max as usize { 1201 errors.push(ConstraintError::MaxLength { 1202 path: path.clone(), 1203 max: max as usize, 1204 actual: byte_len, 1205 }); 1206 } 1207 } 1208 1209 // Check grapheme count constraints 1210 if schema.min_graphemes.is_some() || schema.max_graphemes.is_some() { 1211 use unicode_segmentation::UnicodeSegmentation; 1212 let grapheme_count = value.graphemes(true).count(); 1213 1214 if let Some(min) = schema.min_graphemes { 1215 if grapheme_count < min as usize { 1216 errors.push(ConstraintError::MinGraphemes { 1217 path: path.clone(), 1218 min: min as usize, 1219 actual: grapheme_count, 1220 }); 1221 } 1222 } 1223 1224 if let Some(max) = schema.max_graphemes { 1225 if grapheme_count > max as usize { 1226 errors.push(ConstraintError::MaxGraphemes { 1227 path: path.clone(), 1228 max: max as usize, 1229 actual: grapheme_count, 1230 }); 1231 } 1232 } 1233 } 1234 1235 errors 1236} 1237 1238/// Check integer constraints 1239fn check_integer_constraints( 1240 path: &ValidationPath, 1241 value: i64, 1242 schema: &crate::lexicon::LexInteger, 1243) -> Vec<ConstraintError> { 1244 let mut errors = Vec::new(); 1245 1246 if let Some(min) = schema.minimum { 1247 if value < min { 1248 errors.push(ConstraintError::Minimum { 1249 path: path.clone(), 1250 min, 1251 actual: value, 1252 }); 1253 } 1254 } 1255 1256 if let Some(max) = schema.maximum { 1257 if value > max { 1258 errors.push(ConstraintError::Maximum { 1259 path: path.clone(), 1260 max, 1261 actual: value, 1262 }); 1263 } 1264 } 1265 1266 errors 1267} 1268 1269/// Check array length constraints 1270fn check_array_constraints( 1271 path: &ValidationPath, 1272 array: &jacquard_common::types::value::Array, 1273 schema: &crate::lexicon::LexArray, 1274) -> Vec<ConstraintError> { 1275 let mut errors = Vec::new(); 1276 let len = array.len(); 1277 1278 if let Some(min) = schema.min_length { 1279 if len < min as usize { 1280 errors.push(ConstraintError::MinLength { 1281 path: path.clone(), 1282 min: min as usize, 1283 actual: len, 1284 }); 1285 } 1286 } 1287 1288 if let Some(max) = schema.max_length { 1289 if len > max as usize { 1290 errors.push(ConstraintError::MaxLength { 1291 path: path.clone(), 1292 max: max as usize, 1293 actual: len, 1294 }); 1295 } 1296 } 1297 1298 errors 1299} 1300 1301/// Check constraints on array items 1302fn check_array_item_constraints( 1303 path: &mut ValidationPath, 1304 data: &Data, 1305 item_schema: &LexArrayItem, 1306 current_nsid: &str, 1307 registry: &SchemaRegistry, 1308) -> Vec<ConstraintError> { 1309 check_property_constraints( 1310 path, 1311 data, 1312 &item_schema.clone().into_object_property(), 1313 current_nsid, 1314 registry, 1315 ) 1316} 1317 1318#[cfg(test)] 1319mod tests;