A better Rust ATProto crate
1//! Runtime validation of Data values against lexicon schemas
2//!
3//! This module provides infrastructure for validating untyped `Data` values against
4//! lexicon schemas, enabling partial deserialization, debugging, and schema migration.
5
6use crate::lexicon::{LexArrayItem, LexObjectProperty};
7use crate::ref_utils::RefPath;
8use crate::schema::SchemaRegistry;
9use cid::Cid as IpldCid;
10use dashmap::DashMap;
11use jacquard_common::{smol_str, types::value::Data};
12use sha2::{Digest, Sha256};
13use smol_str::SmolStr;
14use std::{
15 fmt,
16 sync::{Arc, LazyLock},
17};
18
19/// Path to a value within a data structure
20///
21/// Tracks the location of values during validation for precise error reporting.
22#[derive(Debug, Clone, PartialEq, Eq)]
23pub struct ValidationPath {
24 segments: Vec<PathSegment>,
25}
26
27/// A segment in a validation path
28#[derive(Debug, Clone, PartialEq, Eq)]
29pub enum PathSegment {
30 /// Object field access
31 Field(SmolStr),
32 /// Array index access
33 Index(usize),
34 /// Union variant discriminator
35 UnionVariant(SmolStr),
36}
37
38impl ValidationPath {
39 /// Create a new empty path
40 pub fn new() -> Self {
41 Self {
42 segments: Vec::new(),
43 }
44 }
45
46 /// Create a path with a single field segment
47 pub fn from_field(name: &str) -> Self {
48 let mut path = Self::new();
49 path.push_field(name);
50 path
51 }
52
53 /// Add a field segment to the path
54 pub fn push_field(&mut self, name: &str) {
55 self.segments.push(PathSegment::Field(name.into()));
56 }
57
58 /// Add an index segment to the path
59 pub fn push_index(&mut self, idx: usize) {
60 self.segments.push(PathSegment::Index(idx));
61 }
62
63 /// Add a union variant segment to the path
64 pub fn push_variant(&mut self, type_str: &str) {
65 self.segments
66 .push(PathSegment::UnionVariant(type_str.into()));
67 }
68
69 /// Remove the last segment from the path
70 pub fn pop(&mut self) {
71 self.segments.pop();
72 }
73
74 /// Get the depth of the path
75 pub fn depth(&self) -> usize {
76 self.segments.len()
77 }
78
79 /// Check if the path is empty
80 pub fn is_empty(&self) -> bool {
81 self.segments.is_empty()
82 }
83
84 pub fn segments(&self) -> &[PathSegment] {
85 &self.segments
86 }
87}
88
89impl Default for ValidationPath {
90 fn default() -> Self {
91 Self::new()
92 }
93}
94
95impl fmt::Display for ValidationPath {
96 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
97 if self.segments.is_empty() {
98 return write!(f, "(root)");
99 }
100
101 for seg in &self.segments {
102 match seg {
103 PathSegment::Field(name) => write!(f, ".{}", name)?,
104 PathSegment::Index(idx) => write!(f, "[{}]", idx)?,
105 PathSegment::UnionVariant(t) => write!(f, "($type={})", t)?,
106 }
107 }
108 Ok(())
109 }
110}
111
112/// Structural validation errors
113///
114/// These errors indicate that the data structure doesn't match the schema's type expectations.
115#[derive(Debug, Clone, thiserror::Error, miette::Diagnostic)]
116#[non_exhaustive]
117pub enum StructuralError {
118 #[error("Type mismatch at {path}: expected {expected}, got {actual}")]
119 TypeMismatch {
120 path: ValidationPath,
121 expected: jacquard_common::types::DataModelType,
122 actual: jacquard_common::types::DataModelType,
123 },
124
125 #[error("Missing required field at {path}: '{field}'")]
126 MissingRequiredField {
127 path: ValidationPath,
128 field: SmolStr,
129 },
130
131 #[error("Missing union discriminator ($type) at {path}")]
132 MissingUnionDiscriminator { path: ValidationPath },
133
134 #[error("Union type mismatch at {path}: $type='{actual_type}' not in [{expected_refs}]")]
135 UnionNoMatch {
136 path: ValidationPath,
137 actual_type: SmolStr,
138 expected_refs: SmolStr,
139 },
140
141 #[error("Unresolved ref at {path}: '{ref_nsid}'")]
142 UnresolvedRef {
143 path: ValidationPath,
144 ref_nsid: SmolStr,
145 },
146
147 #[error("Reference cycle detected at {path}: '{ref_nsid}' (stack: {stack})")]
148 RefCycle {
149 path: ValidationPath,
150 ref_nsid: SmolStr,
151 stack: SmolStr,
152 },
153
154 #[error("Max validation depth exceeded at {path}: {max}")]
155 MaxDepthExceeded { path: ValidationPath, max: usize },
156}
157
158/// Constraint validation errors
159///
160/// These errors indicate that the data violates lexicon constraints like max_length,
161/// max_graphemes, ranges, etc. The structure is correct but values are out of bounds.
162#[derive(Debug, Clone, thiserror::Error, miette::Diagnostic)]
163#[non_exhaustive]
164pub enum ConstraintError {
165 #[error("{path} exceeds max length: {actual} > {max}")]
166 MaxLength {
167 path: ValidationPath,
168 max: usize,
169 actual: usize,
170 },
171
172 #[error("{path} exceeds max graphemes: {actual} > {max}")]
173 MaxGraphemes {
174 path: ValidationPath,
175 max: usize,
176 actual: usize,
177 },
178
179 #[error("{path} below min length: {actual} < {min}")]
180 MinLength {
181 path: ValidationPath,
182 min: usize,
183 actual: usize,
184 },
185
186 #[error("{path} below min graphemes: {actual} < {min}")]
187 MinGraphemes {
188 path: ValidationPath,
189 min: usize,
190 actual: usize,
191 },
192
193 #[error("{path} value {actual} exceeds maximum: {max}")]
194 Maximum {
195 path: ValidationPath,
196 max: i64,
197 actual: i64,
198 },
199
200 #[error("{path} value {actual} below minimum: {min}")]
201 Minimum {
202 path: ValidationPath,
203 min: i64,
204 actual: i64,
205 },
206}
207
208/// Unified validation error type
209#[derive(Debug, Clone, thiserror::Error)]
210#[non_exhaustive]
211pub enum ValidationError {
212 #[error(transparent)]
213 Structural(#[from] StructuralError),
214
215 #[error(transparent)]
216 Constraint(#[from] ConstraintError),
217}
218
219/// Cache key for validation results
220///
221/// Content-addressed by CID to enable efficient caching across identical data.
222#[derive(Debug, Clone, Hash, Eq, PartialEq)]
223struct ValidationCacheKey {
224 nsid: SmolStr,
225 def_name: SmolStr,
226 cid: IpldCid,
227}
228
229impl ValidationCacheKey {
230 /// Create cache key from schema info and data
231 fn from_data<T: crate::schema::LexiconSchema>(
232 data: &Data,
233 ) -> Result<Self, CidComputationError> {
234 let cid = compute_data_cid(data)?;
235 Ok(Self {
236 nsid: SmolStr::new_static(T::nsid()),
237 def_name: SmolStr::new_static(T::def_name()),
238 cid,
239 })
240 }
241}
242
243/// Errors that can occur when computing CIDs
244#[derive(Debug, thiserror::Error)]
245#[non_exhaustive]
246pub enum CidComputationError {
247 #[error("Failed to serialize data to DAG-CBOR: {0}")]
248 DagCborEncode(#[from] serde_ipld_dagcbor::EncodeError<std::collections::TryReserveError>),
249
250 #[error("Failed to create multihash: {0}")]
251 Multihash(#[from] multihash::Error),
252}
253
254/// Compute CID for Data value
255///
256/// Uses SHA-256 hash and DAG-CBOR codec for content addressing.
257fn compute_data_cid(data: &Data) -> Result<IpldCid, CidComputationError> {
258 // Serialize to DAG-CBOR
259 let dag_cbor = data.to_dag_cbor()?;
260
261 // Compute SHA-256 hash
262 let hash = Sha256::digest(&dag_cbor);
263
264 // Create multihash (code 0x12 = sha2-256)
265 let multihash = multihash::Multihash::wrap(0x12, &hash)?;
266
267 // Create CIDv1 with dag-cbor codec (0x71)
268 Ok(IpldCid::new_v1(0x71, multihash))
269}
270
271/// Trait for converting lexicon types to object properties
272///
273/// This enables type-safe conversion between array items and object properties
274/// for unified validation logic.
275trait IntoObjectProperty<'a> {
276 /// Convert this type to an equivalent object property
277 fn into_object_property(self) -> LexObjectProperty<'a>;
278}
279
280impl<'a> IntoObjectProperty<'a> for LexArrayItem<'a> {
281 fn into_object_property(self) -> LexObjectProperty<'a> {
282 match self {
283 LexArrayItem::String(s) => LexObjectProperty::String(s),
284 LexArrayItem::Integer(i) => LexObjectProperty::Integer(i),
285 LexArrayItem::Boolean(b) => LexObjectProperty::Boolean(b),
286 LexArrayItem::Object(o) => LexObjectProperty::Object(o),
287 LexArrayItem::Unknown(u) => LexObjectProperty::Unknown(u),
288 LexArrayItem::Bytes(b) => LexObjectProperty::Bytes(b),
289 LexArrayItem::CidLink(c) => LexObjectProperty::CidLink(c),
290 LexArrayItem::Blob(b) => LexObjectProperty::Blob(b),
291 LexArrayItem::Ref(r) => LexObjectProperty::Ref(r),
292 LexArrayItem::Union(u) => LexObjectProperty::Union(u),
293 }
294 }
295}
296
297/// Result of validating Data against a schema
298///
299/// Distinguishes between structural errors (type mismatches, missing fields) and
300/// constraint violations (max_length, ranges, etc.).
301#[derive(Debug, Clone)]
302pub enum ValidationResult {
303 /// Only structural validation was performed (or data was structurally invalid)
304 StructuralOnly { structural: Vec<StructuralError> },
305 /// Both structural and constraint validation were performed
306 Complete {
307 structural: Vec<StructuralError>,
308 constraints: Vec<ConstraintError>,
309 },
310}
311
312impl ValidationResult {
313 /// Check if validation passed (no structural or constraint errors)
314 pub fn is_valid(&self) -> bool {
315 match self {
316 ValidationResult::StructuralOnly { structural } => structural.is_empty(),
317 ValidationResult::Complete {
318 structural,
319 constraints,
320 } => structural.is_empty() && constraints.is_empty(),
321 }
322 }
323
324 /// Check if structurally valid (ignoring constraint checks)
325 pub fn is_structurally_valid(&self) -> bool {
326 match self {
327 ValidationResult::StructuralOnly { structural } => structural.is_empty(),
328 ValidationResult::Complete { structural, .. } => structural.is_empty(),
329 }
330 }
331
332 /// Get structural errors
333 pub fn structural_errors(&self) -> &[StructuralError] {
334 match self {
335 ValidationResult::StructuralOnly { structural } => structural,
336 ValidationResult::Complete { structural, .. } => structural,
337 }
338 }
339
340 /// Get constraint errors
341 pub fn constraint_errors(&self) -> &[ConstraintError] {
342 match self {
343 ValidationResult::StructuralOnly { .. } => &[],
344 ValidationResult::Complete { constraints, .. } => constraints,
345 }
346 }
347
348 /// Check if there are any constraint violations
349 pub fn has_constraint_violations(&self) -> bool {
350 !self.constraint_errors().is_empty()
351 }
352
353 /// Get all errors (structural and constraint)
354 pub fn all_errors(&self) -> impl Iterator<Item = ValidationError> + '_ {
355 self.structural_errors()
356 .iter()
357 .cloned()
358 .map(ValidationError::Structural)
359 .chain(
360 self.constraint_errors()
361 .iter()
362 .cloned()
363 .map(ValidationError::Constraint),
364 )
365 }
366}
367
368/// Schema validator with caching
369///
370/// Validates Data values against lexicon schemas, caching results by content hash.
371pub struct SchemaValidator {
372 registry: SchemaRegistry,
373 cache: DashMap<ValidationCacheKey, Arc<ValidationResult>>,
374}
375
376static VALIDATOR: LazyLock<SchemaValidator> = LazyLock::new(|| SchemaValidator {
377 registry: SchemaRegistry::from_inventory(),
378 cache: DashMap::new(),
379});
380
381impl SchemaValidator {
382 /// Get the global validator instance
383 pub fn global() -> &'static Self {
384 &VALIDATOR
385 }
386
387 /// Create a new validator with empty registry
388 pub fn new() -> Self {
389 Self {
390 registry: SchemaRegistry::new(),
391 cache: DashMap::new(),
392 }
393 }
394
395 pub fn from_registry(registry: SchemaRegistry) -> Self {
396 Self {
397 registry,
398 cache: DashMap::new(),
399 }
400 }
401
402 /// Validate data against a schema (structural and constraints)
403 ///
404 /// Performs both structural validation (types, required fields) and constraint
405 /// validation (max_length, ranges, etc.). Results are cached by content hash.
406 pub fn validate<T: crate::schema::LexiconSchema>(
407 &self,
408 data: &Data,
409 ) -> Result<ValidationResult, CidComputationError> {
410 // Compute cache key
411 let key = ValidationCacheKey::from_data::<T>(data)?;
412
413 // Check cache (clone Arc immediately to avoid holding ref)
414 if let Some(cached) = self.cache.get(&key).map(|r| Arc::clone(&r)) {
415 return Ok((*cached).clone());
416 }
417
418 // Perform validation
419 let result = self.validate_uncached::<T>(data);
420
421 // Cache result
422 self.cache.insert(key, Arc::new(result.clone()));
423
424 Ok(result)
425 }
426
427 /// Validate only the structural aspects of data against a schema
428 ///
429 /// Only checks types, required fields, and schema structure. Does not check
430 /// constraints like max_length, ranges, etc. This is faster when you only
431 /// care about type correctness.
432 pub fn validate_structural<T: crate::schema::LexiconSchema>(
433 &self,
434 data: &Data,
435 ) -> ValidationResult {
436 self.validate_structural_uncached::<T>(data)
437 }
438
439 /// Validate without caching (internal)
440 fn validate_uncached<T: crate::schema::LexiconSchema>(&self, data: &Data) -> ValidationResult {
441 let def = match self.registry.get_def(T::nsid(), T::def_name()) {
442 Some(d) => d,
443 None => {
444 // Schema not found - this is a structural error
445 return ValidationResult::StructuralOnly {
446 structural: vec![StructuralError::UnresolvedRef {
447 path: ValidationPath::new(),
448 ref_nsid: format!("{}#{}", T::nsid(), T::def_name()).into(),
449 }],
450 };
451 }
452 };
453
454 let mut path = ValidationPath::new();
455 let mut ctx = ValidationContext::new(T::nsid(), T::def_name());
456
457 let structural_errors = validate_def(&mut path, data, &def, &self.registry, &mut ctx);
458
459 // If structurally invalid, return structural errors only
460 if !structural_errors.is_empty() {
461 return ValidationResult::StructuralOnly {
462 structural: structural_errors,
463 };
464 }
465
466 // Structurally valid - compute constraints eagerly
467 let mut path = ValidationPath::new();
468 let constraint_errors = validate_constraints(
469 &mut path,
470 data,
471 T::nsid(),
472 T::def_name(),
473 Some(&Arc::new(self.registry.clone())),
474 );
475
476 ValidationResult::Complete {
477 structural: structural_errors,
478 constraints: constraint_errors,
479 }
480 }
481
482 /// Validate structural aspects only without caching (internal)
483 fn validate_structural_uncached<T: crate::schema::LexiconSchema>(
484 &self,
485 data: &Data,
486 ) -> ValidationResult {
487 let def = match self.registry.get_def(T::nsid(), T::def_name()) {
488 Some(d) => d,
489 None => {
490 // Schema not found - this is a structural error
491 return ValidationResult::StructuralOnly {
492 structural: vec![StructuralError::UnresolvedRef {
493 path: ValidationPath::new(),
494 ref_nsid: format!("{}#{}", T::nsid(), T::def_name()).into(),
495 }],
496 };
497 }
498 };
499
500 let mut path = ValidationPath::new();
501 let mut ctx = ValidationContext::new(T::nsid(), T::def_name());
502
503 let structural_errors = validate_def(&mut path, data, &def, &self.registry, &mut ctx);
504
505 ValidationResult::StructuralOnly {
506 structural: structural_errors,
507 }
508 }
509
510 pub fn validate_by_nsid_structural(&self, nsid: &str, data: &Data) -> ValidationResult {
511 let mut split = nsid.split('#');
512 let nsid = split.next().unwrap();
513 let def_name = split.next().unwrap_or("main");
514 let def = match self.registry.get_def(nsid, def_name) {
515 Some(d) => d,
516 None => {
517 // Schema not found - this is a structural error
518 return ValidationResult::StructuralOnly {
519 structural: vec![StructuralError::UnresolvedRef {
520 path: ValidationPath::new(),
521 ref_nsid: format!("{}#{}", nsid, def_name).into(),
522 }],
523 };
524 }
525 };
526
527 let mut path = ValidationPath::new();
528 let mut ctx = ValidationContext::new(nsid, def_name);
529
530 let structural_errors = validate_def(&mut path, data, &def, &self.registry, &mut ctx);
531
532 ValidationResult::StructuralOnly {
533 structural: structural_errors,
534 }
535 }
536
537 pub fn validate_by_nsid(&self, nsid: &str, data: &Data) -> ValidationResult {
538 let mut split = nsid.split('#');
539 let nsid = split.next().unwrap();
540 let def_name = split.next().unwrap_or("main");
541 let def = match self.registry.get_def(nsid, def_name) {
542 Some(d) => d,
543 None => {
544 // Schema not found - this is a structural error
545 return ValidationResult::StructuralOnly {
546 structural: vec![StructuralError::UnresolvedRef {
547 path: ValidationPath::new(),
548 ref_nsid: format!("{}#{}", nsid, def_name).into(),
549 }],
550 };
551 }
552 };
553
554 let mut path = ValidationPath::new();
555 let mut ctx = ValidationContext::new(nsid, def_name);
556
557 let structural_errors = validate_def(&mut path, data, &def, &self.registry, &mut ctx);
558
559 // If structurally invalid, return structural errors only
560 if !structural_errors.is_empty() {
561 return ValidationResult::StructuralOnly {
562 structural: structural_errors,
563 };
564 }
565
566 // Structurally valid - compute constraints eagerly
567 let mut path = ValidationPath::new();
568 let constraint_errors = validate_constraints(
569 &mut path,
570 data,
571 nsid,
572 def_name,
573 Some(&Arc::new(self.registry.clone())),
574 );
575
576 ValidationResult::Complete {
577 structural: structural_errors,
578 constraints: constraint_errors,
579 }
580 }
581
582 /// Get the schema registry
583 pub fn registry(&self) -> &SchemaRegistry {
584 &self.registry
585 }
586}
587
588impl Default for SchemaValidator {
589 fn default() -> Self {
590 Self::new()
591 }
592}
593
594/// Validation context for tracking refs and preventing cycles
595struct ValidationContext {
596 current_nsid: String,
597 current_def: String,
598 ref_stack: Vec<String>,
599 max_depth: usize,
600}
601
602impl ValidationContext {
603 fn new(nsid: &str, def_name: &str) -> Self {
604 Self {
605 current_nsid: nsid.to_string(),
606 current_def: def_name.to_string(),
607 ref_stack: Vec::new(),
608 max_depth: 32,
609 }
610 }
611}
612
613/// Validate data against a lexicon def
614fn validate_def(
615 path: &mut ValidationPath,
616 data: &Data,
617 def: &crate::lexicon::LexUserType,
618 registry: &SchemaRegistry,
619 ctx: &mut ValidationContext,
620) -> Vec<StructuralError> {
621 use crate::lexicon::LexUserType;
622 use jacquard_common::types::DataModelType;
623
624 match def {
625 LexUserType::Object(obj) => {
626 // Must be an object
627 let Data::Object(obj_data) = data else {
628 return vec![StructuralError::TypeMismatch {
629 path: path.clone(),
630 expected: DataModelType::Object,
631 actual: data.data_type(),
632 }];
633 };
634
635 let mut errors = Vec::new();
636
637 // Check required fields
638 if let Some(required) = &obj.required {
639 for field in required {
640 if !obj_data.get(field.as_ref()).is_some() {
641 errors.push(StructuralError::MissingRequiredField {
642 path: path.clone(),
643 field: field.clone(),
644 });
645 }
646 }
647 }
648
649 // Validate each property that's present
650 for (name, prop) in &obj.properties {
651 if let Some(field_data) = obj_data.get(name.as_ref()) {
652 path.push_field(name.as_ref());
653 errors.extend(validate_property(path, field_data, prop, registry, ctx));
654 path.pop();
655 }
656 }
657
658 errors
659 }
660 LexUserType::Record(rec) => {
661 // Records are objects with record-specific metadata
662 let crate::lexicon::LexRecordRecord::Object(obj) = &rec.record;
663
664 let Data::Object(obj_data) = data else {
665 return vec![StructuralError::TypeMismatch {
666 path: path.clone(),
667 expected: data.data_type(),
668 actual: DataModelType::Object,
669 }];
670 };
671
672 let mut errors = Vec::new();
673
674 // Check required fields
675 if let Some(required) = &obj.required {
676 for field in required {
677 if !obj_data.get(field.as_ref()).is_some() {
678 errors.push(StructuralError::MissingRequiredField {
679 path: path.clone(),
680 field: field.clone(),
681 });
682 }
683 }
684 }
685
686 // Validate each property that's present
687 for (name, prop) in &obj.properties {
688 if let Some(field_data) = obj_data.get(name.as_ref()) {
689 path.push_field(name.as_ref());
690 errors.extend(validate_property(path, field_data, prop, registry, ctx));
691 path.pop();
692 }
693 }
694
695 errors
696 }
697 // Token types are unit types, no validation needed beyond type checking
698 LexUserType::Token(_) => Vec::new(),
699 // XRPC types are endpoint definitions, not data types
700 LexUserType::XrpcQuery(_)
701 | LexUserType::XrpcProcedure(_)
702 | LexUserType::XrpcSubscription(_) => Vec::new(),
703 // Other types
704 _ => Vec::new(),
705 }
706}
707
708/// Validate data against a property schema
709fn validate_property(
710 path: &mut ValidationPath,
711 data: &Data,
712 prop: &crate::lexicon::LexObjectProperty,
713 registry: &SchemaRegistry,
714 ctx: &mut ValidationContext,
715) -> Vec<StructuralError> {
716 use crate::lexicon::LexObjectProperty;
717 use jacquard_common::types::DataModelType;
718
719 match prop {
720 LexObjectProperty::String(_) => {
721 // Accept any string type
722 if !matches!(data.data_type(), DataModelType::String(_)) {
723 vec![StructuralError::TypeMismatch {
724 path: path.clone(),
725 expected: DataModelType::String(
726 jacquard_common::types::LexiconStringType::String,
727 ),
728 actual: data.data_type(),
729 }]
730 } else {
731 Vec::new()
732 }
733 }
734
735 LexObjectProperty::Integer(_) => {
736 if !matches!(data.data_type(), DataModelType::Integer) {
737 vec![StructuralError::TypeMismatch {
738 path: path.clone(),
739 expected: DataModelType::Integer,
740 actual: data.data_type(),
741 }]
742 } else {
743 Vec::new()
744 }
745 }
746
747 LexObjectProperty::Boolean(_) => {
748 if !matches!(data.data_type(), DataModelType::Boolean) {
749 vec![StructuralError::TypeMismatch {
750 path: path.clone(),
751 expected: DataModelType::Boolean,
752 actual: data.data_type(),
753 }]
754 } else {
755 Vec::new()
756 }
757 }
758
759 LexObjectProperty::Object(obj) => {
760 let Data::Object(obj_data) = data else {
761 return vec![StructuralError::TypeMismatch {
762 path: path.clone(),
763 expected: DataModelType::Object,
764 actual: data.data_type(),
765 }];
766 };
767
768 let mut errors = Vec::new();
769
770 // Check required fields
771 if let Some(required) = &obj.required {
772 for field in required {
773 if !obj_data.get(field.as_ref()).is_some() {
774 errors.push(StructuralError::MissingRequiredField {
775 path: path.clone(),
776 field: field.clone(),
777 });
778 }
779 }
780 }
781
782 // Recursively validate each property
783 for (name, schema_prop) in &obj.properties {
784 if let Some(field_data) = obj_data.get(name.as_ref()) {
785 path.push_field(name.as_ref());
786 errors.extend(validate_property(
787 path,
788 field_data,
789 schema_prop,
790 registry,
791 ctx,
792 ));
793 path.pop();
794 }
795 }
796
797 errors
798 }
799
800 LexObjectProperty::Array(arr) => {
801 let Data::Array(array) = data else {
802 return vec![StructuralError::TypeMismatch {
803 path: path.clone(),
804 expected: DataModelType::Array,
805 actual: data.data_type(),
806 }];
807 };
808
809 let mut errors = Vec::new();
810 for (idx, item) in array.iter().enumerate() {
811 path.push_index(idx);
812 errors.extend(validate_array_item(path, item, &arr.items, registry, ctx));
813 path.pop();
814 }
815 errors
816 }
817
818 LexObjectProperty::Union(u) => {
819 let Data::Object(obj) = data else {
820 return vec![StructuralError::TypeMismatch {
821 path: path.clone(),
822 expected: DataModelType::Object,
823 actual: data.data_type(),
824 }];
825 };
826
827 // Get $type discriminator
828 let Some(type_str) = obj.type_discriminator() else {
829 return vec![StructuralError::MissingUnionDiscriminator { path: path.clone() }];
830 };
831
832 // Reject empty $type
833 if type_str.is_empty() {
834 return vec![StructuralError::MissingUnionDiscriminator { path: path.clone() }];
835 }
836
837 // Try to match against refs
838 for variant_ref in &u.refs {
839 let ref_path = RefPath::parse(variant_ref.as_ref(), Some(&ctx.current_nsid));
840 let variant_nsid = ref_path.nsid().to_string();
841 let variant_def = ref_path.def().to_string();
842 let full_variant = ref_path.full_ref();
843
844 // Match by full ref or just nsid
845 if type_str == full_variant || type_str == variant_nsid {
846 // Found match - validate against this variant
847 let Some(variant_def_type) = registry.get_def(&variant_nsid, &variant_def)
848 else {
849 return vec![StructuralError::UnresolvedRef {
850 path: path.clone(),
851 ref_nsid: full_variant.into(),
852 }];
853 };
854
855 path.push_variant(type_str);
856 let old_nsid = std::mem::replace(&mut ctx.current_nsid, variant_nsid);
857 let old_def = std::mem::replace(&mut ctx.current_def, variant_def);
858
859 let errors = validate_def(path, data, &variant_def_type, registry, ctx);
860
861 ctx.current_nsid = old_nsid;
862 ctx.current_def = old_def;
863 path.pop();
864
865 return errors;
866 }
867 }
868
869 // No match found
870 if u.closed.unwrap_or(false) {
871 // Closed union - this is an error
872 let expected_refs = u
873 .refs
874 .iter()
875 .map(|r| r.as_ref())
876 .collect::<Vec<_>>()
877 .join(", ");
878 vec![StructuralError::UnionNoMatch {
879 path: path.clone(),
880 actual_type: type_str.into(),
881 expected_refs: expected_refs.into(),
882 }]
883 } else {
884 // Open union - allow unknown variants
885 Vec::new()
886 }
887 }
888
889 LexObjectProperty::Ref(r) => {
890 // Depth check
891 if path.depth() >= ctx.max_depth {
892 return vec![StructuralError::MaxDepthExceeded {
893 path: path.clone(),
894 max: ctx.max_depth,
895 }];
896 }
897
898 // Normalize ref
899 let ref_path = RefPath::parse(r.r#ref.as_ref(), Some(&ctx.current_nsid));
900 let ref_nsid = ref_path.nsid().to_string();
901 let ref_def = ref_path.def().to_string();
902 let full_ref = ref_path.full_ref();
903
904 // Cycle detection
905 if ctx.ref_stack.contains(&full_ref) {
906 let stack = ctx.ref_stack.join(" -> ");
907 return vec![StructuralError::RefCycle {
908 path: path.clone(),
909 ref_nsid: full_ref.into(),
910 stack: stack.into(),
911 }];
912 }
913
914 // Look up ref
915 let Some(ref_def_type) = registry.get_def(&ref_nsid, &ref_def) else {
916 return vec![StructuralError::UnresolvedRef {
917 path: path.clone(),
918 ref_nsid: full_ref.into(),
919 }];
920 };
921
922 // Push, validate, pop
923 ctx.ref_stack.push(full_ref);
924 let old_nsid = std::mem::replace(&mut ctx.current_nsid, ref_nsid);
925 let old_def = std::mem::replace(&mut ctx.current_def, ref_def);
926
927 let errors = validate_def(path, data, &ref_def_type, registry, ctx);
928
929 ctx.current_nsid = old_nsid;
930 ctx.current_def = old_def;
931 ctx.ref_stack.pop();
932
933 errors
934 }
935
936 LexObjectProperty::Bytes(_) => {
937 if !matches!(data.data_type(), DataModelType::Bytes) {
938 vec![StructuralError::TypeMismatch {
939 path: path.clone(),
940 expected: DataModelType::Bytes,
941 actual: data.data_type(),
942 }]
943 } else {
944 Vec::new()
945 }
946 }
947
948 LexObjectProperty::CidLink(_) => {
949 if !matches!(data.data_type(), DataModelType::CidLink) {
950 vec![StructuralError::TypeMismatch {
951 path: path.clone(),
952 expected: DataModelType::CidLink,
953 actual: data.data_type(),
954 }]
955 } else {
956 Vec::new()
957 }
958 }
959
960 LexObjectProperty::Blob(_) => {
961 if !matches!(data.data_type(), DataModelType::Blob) {
962 vec![StructuralError::TypeMismatch {
963 path: path.clone(),
964 expected: DataModelType::Blob,
965 actual: data.data_type(),
966 }]
967 } else {
968 Vec::new()
969 }
970 }
971
972 LexObjectProperty::Unknown(_) => {
973 // Any type allowed
974 Vec::new()
975 }
976 }
977}
978
979/// Validate array item against array item schema
980fn validate_array_item(
981 path: &mut ValidationPath,
982 data: &Data,
983 item_schema: &LexArrayItem,
984 registry: &SchemaRegistry,
985 ctx: &mut ValidationContext,
986) -> Vec<StructuralError> {
987 validate_property(
988 path,
989 data,
990 &item_schema.clone().into_object_property(),
991 registry,
992 ctx,
993 )
994}
995
996// ============================================================================
997// CONSTRAINT VALIDATION
998// ============================================================================
999
1000/// Validate constraints on data against schema (entry point with optional registry)
1001fn validate_constraints(
1002 path: &mut ValidationPath,
1003 data: &Data,
1004 nsid: &str,
1005 def_name: &str,
1006 registry: Option<&Arc<SchemaRegistry>>,
1007) -> Vec<ConstraintError> {
1008 // Use provided registry or fall back to global inventory
1009 let fallback_registry;
1010 let registry_ref = match registry {
1011 Some(r) => r.as_ref(),
1012 None => {
1013 fallback_registry = SchemaRegistry::from_inventory();
1014 &fallback_registry
1015 }
1016 };
1017
1018 validate_constraints_impl(path, data, nsid, def_name, registry_ref)
1019}
1020
1021/// Internal implementation that takes materialized registry
1022fn validate_constraints_impl(
1023 path: &mut ValidationPath,
1024 data: &Data,
1025 nsid: &str,
1026 def_name: &str,
1027 registry: &SchemaRegistry,
1028) -> Vec<ConstraintError> {
1029 use crate::lexicon::LexUserType;
1030
1031 // Get schema def
1032 let Some(def) = registry.get_def(nsid, def_name) else {
1033 return Vec::new();
1034 };
1035
1036 match def {
1037 LexUserType::Object(obj) => {
1038 let Data::Object(obj_data) = data else {
1039 return Vec::new();
1040 };
1041
1042 let mut errors = Vec::new();
1043
1044 // Check constraints on each property
1045 for (name, prop) in &obj.properties {
1046 if let Some(field_data) = obj_data.get(name.as_ref()) {
1047 path.push_field(name.as_ref());
1048 errors.extend(check_property_constraints(
1049 path, field_data, prop, nsid, registry,
1050 ));
1051 path.pop();
1052 }
1053 }
1054
1055 errors
1056 }
1057 LexUserType::Record(rec) => {
1058 // Records are objects with record-specific metadata
1059 let crate::lexicon::LexRecordRecord::Object(obj) = &rec.record;
1060
1061 let Data::Object(obj_data) = data else {
1062 return Vec::new();
1063 };
1064
1065 let mut errors = Vec::new();
1066
1067 // Check constraints on each property
1068 for (name, prop) in &obj.properties {
1069 if let Some(field_data) = obj_data.get(name.as_ref()) {
1070 path.push_field(name.as_ref());
1071 errors.extend(check_property_constraints(
1072 path, field_data, prop, nsid, registry,
1073 ));
1074 path.pop();
1075 }
1076 }
1077
1078 errors
1079 }
1080 // Token types, XRPC types, and other types don't have constraints
1081 _ => Vec::new(),
1082 }
1083}
1084
1085/// Check constraints on a property
1086fn check_property_constraints(
1087 path: &mut ValidationPath,
1088 data: &Data,
1089 prop: &crate::lexicon::LexObjectProperty,
1090 current_nsid: &str,
1091 registry: &SchemaRegistry,
1092) -> Vec<ConstraintError> {
1093 use crate::lexicon::LexObjectProperty;
1094
1095 match prop {
1096 LexObjectProperty::String(s) => {
1097 if let Data::String(str_val) = data {
1098 check_string_constraints(path, str_val.as_str(), s)
1099 } else {
1100 Vec::new()
1101 }
1102 }
1103
1104 LexObjectProperty::Integer(i) => {
1105 if let Data::Integer(int_val) = data {
1106 check_integer_constraints(path, *int_val, i)
1107 } else {
1108 Vec::new()
1109 }
1110 }
1111
1112 LexObjectProperty::Array(arr) => {
1113 if let Data::Array(array) = data {
1114 let mut errors = check_array_constraints(path, array, arr);
1115
1116 // Also check constraints on array items
1117 for (idx, item) in array.iter().enumerate() {
1118 path.push_index(idx);
1119 errors.extend(check_array_item_constraints(
1120 path,
1121 item,
1122 &arr.items,
1123 current_nsid,
1124 registry,
1125 ));
1126 path.pop();
1127 }
1128
1129 errors
1130 } else {
1131 Vec::new()
1132 }
1133 }
1134
1135 LexObjectProperty::Object(obj) => {
1136 if let Data::Object(obj_data) = data {
1137 let mut errors = Vec::new();
1138
1139 // Recursively check nested object properties
1140 for (name, schema_prop) in &obj.properties {
1141 if let Some(field_data) = obj_data.get(name.as_ref()) {
1142 path.push_field(name.as_ref());
1143 errors.extend(check_property_constraints(
1144 path,
1145 field_data,
1146 schema_prop,
1147 current_nsid,
1148 registry,
1149 ));
1150 path.pop();
1151 }
1152 }
1153
1154 errors
1155 } else {
1156 Vec::new()
1157 }
1158 }
1159
1160 LexObjectProperty::Ref(r) => {
1161 // Follow ref and check constraints
1162 let ref_path = RefPath::parse(r.r#ref.as_ref(), Some(current_nsid));
1163 let ref_nsid = ref_path.nsid();
1164 let ref_def = ref_path.def();
1165
1166 if registry.get_def(ref_nsid, ref_def).is_some() {
1167 validate_constraints_impl(path, data, ref_nsid, ref_def, registry)
1168 } else {
1169 Vec::new()
1170 }
1171 }
1172
1173 // Other property types don't have constraints
1174 _ => Vec::new(),
1175 }
1176}
1177
1178/// Check string constraints
1179fn check_string_constraints(
1180 path: &ValidationPath,
1181 value: &str,
1182 schema: &crate::lexicon::LexString,
1183) -> Vec<ConstraintError> {
1184 let mut errors = Vec::new();
1185
1186 // Check byte length constraints
1187 let byte_len = value.len();
1188
1189 if let Some(min) = schema.min_length {
1190 if byte_len < min as usize {
1191 errors.push(ConstraintError::MinLength {
1192 path: path.clone(),
1193 min: min as usize,
1194 actual: byte_len,
1195 });
1196 }
1197 }
1198
1199 if let Some(max) = schema.max_length {
1200 if byte_len > max as usize {
1201 errors.push(ConstraintError::MaxLength {
1202 path: path.clone(),
1203 max: max as usize,
1204 actual: byte_len,
1205 });
1206 }
1207 }
1208
1209 // Check grapheme count constraints
1210 if schema.min_graphemes.is_some() || schema.max_graphemes.is_some() {
1211 use unicode_segmentation::UnicodeSegmentation;
1212 let grapheme_count = value.graphemes(true).count();
1213
1214 if let Some(min) = schema.min_graphemes {
1215 if grapheme_count < min as usize {
1216 errors.push(ConstraintError::MinGraphemes {
1217 path: path.clone(),
1218 min: min as usize,
1219 actual: grapheme_count,
1220 });
1221 }
1222 }
1223
1224 if let Some(max) = schema.max_graphemes {
1225 if grapheme_count > max as usize {
1226 errors.push(ConstraintError::MaxGraphemes {
1227 path: path.clone(),
1228 max: max as usize,
1229 actual: grapheme_count,
1230 });
1231 }
1232 }
1233 }
1234
1235 errors
1236}
1237
1238/// Check integer constraints
1239fn check_integer_constraints(
1240 path: &ValidationPath,
1241 value: i64,
1242 schema: &crate::lexicon::LexInteger,
1243) -> Vec<ConstraintError> {
1244 let mut errors = Vec::new();
1245
1246 if let Some(min) = schema.minimum {
1247 if value < min {
1248 errors.push(ConstraintError::Minimum {
1249 path: path.clone(),
1250 min,
1251 actual: value,
1252 });
1253 }
1254 }
1255
1256 if let Some(max) = schema.maximum {
1257 if value > max {
1258 errors.push(ConstraintError::Maximum {
1259 path: path.clone(),
1260 max,
1261 actual: value,
1262 });
1263 }
1264 }
1265
1266 errors
1267}
1268
1269/// Check array length constraints
1270fn check_array_constraints(
1271 path: &ValidationPath,
1272 array: &jacquard_common::types::value::Array,
1273 schema: &crate::lexicon::LexArray,
1274) -> Vec<ConstraintError> {
1275 let mut errors = Vec::new();
1276 let len = array.len();
1277
1278 if let Some(min) = schema.min_length {
1279 if len < min as usize {
1280 errors.push(ConstraintError::MinLength {
1281 path: path.clone(),
1282 min: min as usize,
1283 actual: len,
1284 });
1285 }
1286 }
1287
1288 if let Some(max) = schema.max_length {
1289 if len > max as usize {
1290 errors.push(ConstraintError::MaxLength {
1291 path: path.clone(),
1292 max: max as usize,
1293 actual: len,
1294 });
1295 }
1296 }
1297
1298 errors
1299}
1300
1301/// Check constraints on array items
1302fn check_array_item_constraints(
1303 path: &mut ValidationPath,
1304 data: &Data,
1305 item_schema: &LexArrayItem,
1306 current_nsid: &str,
1307 registry: &SchemaRegistry,
1308) -> Vec<ConstraintError> {
1309 check_property_constraints(
1310 path,
1311 data,
1312 &item_schema.clone().into_object_property(),
1313 current_nsid,
1314 registry,
1315 )
1316}
1317
1318#[cfg(test)]
1319mod tests;