A better Rust ATProto crate

further cleanup, simplified validation stuff

Orual 648de58f cc815abb

+183 -127
+6 -14
crates/jacquard-lexicon/src/codegen/output.rs
··· 173 173 174 174 // Create parent directories 175 175 if let Some(parent) = full_path.parent() { 176 - std::fs::create_dir_all(parent).map_err(|e| CodegenError::Other { 177 - message: format!("Failed to create directory {:?}: {}", parent, e), 178 - source: None, 179 - })?; 176 + std::fs::create_dir_all(parent)?; 180 177 } 181 178 182 179 // Format code 183 - let file: syn::File = syn::parse2(tokens.clone()).map_err(|e| CodegenError::Other { 184 - message: format!( 185 - "Failed to parse tokens for {:?}: {}\nTokens: {}", 186 - path, e, tokens 187 - ), 188 - source: None, 180 + let file: syn::File = syn::parse2(tokens.clone()).map_err(|e| CodegenError::TokenParseError { 181 + path: path.clone(), 182 + source: e, 183 + tokens: tokens.to_string(), 189 184 })?; 190 185 let mut formatted = prettyplease::unparse(&file); 191 186 ··· 224 219 formatted = format!("{}{}", header, formatted); 225 220 226 221 // Write file 227 - std::fs::write(&full_path, formatted).map_err(|e| CodegenError::Other { 228 - message: format!("Failed to write file {:?}: {}", full_path, e), 229 - source: None, 230 - })?; 222 + std::fs::write(&full_path, formatted)?; 231 223 } 232 224 233 225 Ok(())
+3 -3
crates/jacquard-lexicon/src/codegen/types.rs
··· 244 244 join_path_parts(&[&self.root_module, &module_path, &file_module, &type_name]) 245 245 }; 246 246 247 - let path: syn::Path = syn::parse_str(&path_str).map_err(|e| CodegenError::Other { 248 - message: format!("Failed to parse path: {} {}", path_str, e), 249 - source: None, 247 + let path: syn::Path = syn::parse_str(&path_str).map_err(|e| CodegenError::PathParseError { 248 + path_str: path_str.clone(), 249 + source: e, 250 250 })?; 251 251 252 252 // Only add lifetime if the target type needs it
+16 -7
crates/jacquard-lexicon/src/error.rs
··· 104 104 source: syn::Error, 105 105 }, 106 106 107 - /// Generic error with context 108 - #[error("{message}")] 109 - #[diagnostic(code(lexicon::error))] 110 - Other { 111 - message: String, 112 - /// Optional source error 107 + /// Failed to parse generated tokens back into syn AST 108 + #[error("Failed to parse generated code for {path:?}")] 109 + #[diagnostic(code(lexicon::token_parse_error))] 110 + TokenParseError { 111 + path: PathBuf, 112 + #[source] 113 + source: syn::Error, 114 + tokens: String, 115 + }, 116 + 117 + /// Failed to parse module path string 118 + #[error("Failed to parse module path: {path_str}")] 119 + #[diagnostic(code(lexicon::path_parse_error))] 120 + PathParseError { 121 + path_str: String, 113 122 #[source] 114 - source: Option<Box<dyn std::error::Error + Send + Sync>>, 123 + source: syn::Error, 115 124 }, 116 125 } 117 126
+102 -103
crates/jacquard-lexicon/src/validation.rs
··· 8 8 use crate::schema::SchemaRegistry; 9 9 use cid::Cid as IpldCid; 10 10 use dashmap::DashMap; 11 - use jacquard_common::{ 12 - IntoStatic, 13 - smol_str, 14 - types::value::Data, 15 - }; 11 + use jacquard_common::{smol_str, types::value::Data}; 16 12 use sha2::{Digest, Sha256}; 17 13 use smol_str::SmolStr; 18 - use std::{ 19 - fmt, 20 - sync::{Arc, LazyLock, OnceLock}, 21 - }; 14 + use std::{fmt, sync::{Arc, LazyLock}}; 22 15 23 16 /// Path to a value within a data structure 24 17 /// ··· 293 286 /// Result of validating Data against a schema 294 287 /// 295 288 /// Distinguishes between structural errors (type mismatches, missing fields) and 296 - /// constraint violations (max_length, ranges, etc.). Constraint validation is lazy. 289 + /// constraint violations (max_length, ranges, etc.). 297 290 #[derive(Debug, Clone)] 298 - pub struct ValidationResult { 299 - /// Structural errors (computed immediately) 300 - structural: Vec<StructuralError>, 301 - 302 - /// Constraint errors (computed on first access) 303 - constraints: OnceLock<Vec<ConstraintError>>, 304 - 305 - /// Context for lazy constraint validation 306 - data: Option<Arc<Data<'static>>>, 307 - schema_ref: Option<(SmolStr, SmolStr)>, // (nsid, def_name) 308 - registry: Option<Arc<SchemaRegistry>>, 291 + pub enum ValidationResult { 292 + /// Only structural validation was performed (or data was structurally invalid) 293 + StructuralOnly { 294 + structural: Vec<StructuralError>, 295 + }, 296 + /// Both structural and constraint validation were performed 297 + Complete { 298 + structural: Vec<StructuralError>, 299 + constraints: Vec<ConstraintError>, 300 + }, 309 301 } 310 302 311 303 impl ValidationResult { 312 - /// Create a validation result with no errors 313 - pub fn valid() -> Self { 314 - Self { 315 - structural: Vec::new(), 316 - constraints: OnceLock::new(), 317 - data: None, 318 - schema_ref: None, 319 - registry: None, 320 - } 321 - } 322 - 323 - /// Create a validation result with structural errors 324 - pub fn with_structural_errors(errors: Vec<StructuralError>) -> Self { 325 - Self { 326 - structural: errors, 327 - constraints: OnceLock::new(), 328 - data: None, 329 - schema_ref: None, 330 - registry: None, 331 - } 332 - } 333 - 334 - /// Create a validation result with context for lazy constraint validation 335 - pub fn with_context( 336 - structural: Vec<StructuralError>, 337 - data: Arc<Data<'static>>, 338 - nsid: SmolStr, 339 - def_name: SmolStr, 340 - registry: Arc<SchemaRegistry>, 341 - ) -> Self { 342 - Self { 343 - structural, 344 - constraints: OnceLock::new(), 345 - data: Some(data), 346 - schema_ref: Some((nsid, def_name)), 347 - registry: Some(registry), 348 - } 349 - } 350 - 351 304 /// Check if validation passed (no structural or constraint errors) 352 305 pub fn is_valid(&self) -> bool { 353 - self.structural.is_empty() && self.constraint_errors().is_empty() 306 + match self { 307 + ValidationResult::StructuralOnly { structural } => structural.is_empty(), 308 + ValidationResult::Complete { 309 + structural, 310 + constraints, 311 + } => structural.is_empty() && constraints.is_empty(), 312 + } 354 313 } 355 314 356 315 /// Check if structurally valid (ignoring constraint checks) 357 316 pub fn is_structurally_valid(&self) -> bool { 358 - self.structural.is_empty() 317 + match self { 318 + ValidationResult::StructuralOnly { structural } => structural.is_empty(), 319 + ValidationResult::Complete { structural, .. } => structural.is_empty(), 320 + } 359 321 } 360 322 361 323 /// Get structural errors 362 324 pub fn structural_errors(&self) -> &[StructuralError] { 363 - &self.structural 325 + match self { 326 + ValidationResult::StructuralOnly { structural } => structural, 327 + ValidationResult::Complete { structural, .. } => structural, 328 + } 364 329 } 365 330 366 - /// Get constraint errors (computed lazily on first access) 331 + /// Get constraint errors 367 332 pub fn constraint_errors(&self) -> &[ConstraintError] { 368 - self.constraints.get_or_init(|| { 369 - // If no context or structurally invalid, skip constraint validation 370 - if !self.is_structurally_valid() || self.data.is_none() || self.schema_ref.is_none() { 371 - return Vec::new(); 372 - } 373 - 374 - let data = self.data.as_ref().unwrap(); 375 - let (nsid, def_name) = self.schema_ref.as_ref().unwrap(); 376 - 377 - let mut path = ValidationPath::new(); 378 - validate_constraints( 379 - &mut path, 380 - data, 381 - nsid.as_str(), 382 - def_name.as_str(), 383 - self.registry.as_ref(), 384 - ) 385 - }) 333 + match self { 334 + ValidationResult::StructuralOnly { .. } => &[], 335 + ValidationResult::Complete { constraints, .. } => constraints, 336 + } 386 337 } 387 338 388 339 /// Check if there are any constraint violations ··· 392 343 393 344 /// Get all errors (structural and constraint) 394 345 pub fn all_errors(&self) -> impl Iterator<Item = ValidationError> + '_ { 395 - self.structural 346 + self.structural_errors() 396 347 .iter() 397 348 .cloned() 398 349 .map(ValidationError::Structural) ··· 431 382 } 432 383 } 433 384 434 - /// Validate data against a schema 385 + /// Validate data against a schema (structural and constraints) 435 386 /// 436 - /// Results are cached by content hash for efficiency. 387 + /// Performs both structural validation (types, required fields) and constraint 388 + /// validation (max_length, ranges, etc.). Results are cached by content hash. 437 389 pub fn validate<T: crate::schema::LexiconSchema>( 438 390 &self, 439 391 data: &Data, ··· 455 407 Ok(result) 456 408 } 457 409 410 + /// Validate only the structural aspects of data against a schema 411 + /// 412 + /// Only checks types, required fields, and schema structure. Does not check 413 + /// constraints like max_length, ranges, etc. This is faster when you only 414 + /// care about type correctness. 415 + pub fn validate_structural<T: crate::schema::LexiconSchema>( 416 + &self, 417 + data: &Data, 418 + ) -> ValidationResult { 419 + self.validate_structural_uncached::<T>(data) 420 + } 421 + 458 422 /// Validate without caching (internal) 459 423 fn validate_uncached<T: crate::schema::LexiconSchema>(&self, data: &Data) -> ValidationResult { 460 424 let def = match self.registry.get_def(T::nsid(), T::def_name()) { 461 425 Some(d) => d, 462 426 None => { 463 427 // Schema not found - this is a structural error 464 - return ValidationResult::with_structural_errors(vec![ 465 - StructuralError::UnresolvedRef { 428 + return ValidationResult::StructuralOnly { 429 + structural: vec![StructuralError::UnresolvedRef { 430 + path: ValidationPath::new(), 431 + ref_nsid: format!("{}#{}", T::nsid(), T::def_name()).into(), 432 + }], 433 + }; 434 + } 435 + }; 436 + 437 + let mut path = ValidationPath::new(); 438 + let mut ctx = ValidationContext::new(T::nsid(), T::def_name()); 439 + 440 + let structural_errors = validate_def(&mut path, data, &def, &self.registry, &mut ctx); 441 + 442 + // If structurally invalid, return structural errors only 443 + if !structural_errors.is_empty() { 444 + return ValidationResult::StructuralOnly { 445 + structural: structural_errors, 446 + }; 447 + } 448 + 449 + // Structurally valid - compute constraints eagerly 450 + let mut path = ValidationPath::new(); 451 + let constraint_errors = validate_constraints( 452 + &mut path, 453 + data, 454 + T::nsid(), 455 + T::def_name(), 456 + Some(&Arc::new(self.registry.clone())), 457 + ); 458 + 459 + ValidationResult::Complete { 460 + structural: structural_errors, 461 + constraints: constraint_errors, 462 + } 463 + } 464 + 465 + /// Validate structural aspects only without caching (internal) 466 + fn validate_structural_uncached<T: crate::schema::LexiconSchema>( 467 + &self, 468 + data: &Data, 469 + ) -> ValidationResult { 470 + let def = match self.registry.get_def(T::nsid(), T::def_name()) { 471 + Some(d) => d, 472 + None => { 473 + // Schema not found - this is a structural error 474 + return ValidationResult::StructuralOnly { 475 + structural: vec![StructuralError::UnresolvedRef { 466 476 path: ValidationPath::new(), 467 477 ref_nsid: format!("{}#{}", T::nsid(), T::def_name()).into(), 468 - }, 469 - ]); 478 + }], 479 + }; 470 480 } 471 481 }; 472 482 473 483 let mut path = ValidationPath::new(); 474 484 let mut ctx = ValidationContext::new(T::nsid(), T::def_name()); 475 485 476 - let errors = validate_def(&mut path, data, &def, &self.registry, &mut ctx); 486 + let structural_errors = validate_def(&mut path, data, &def, &self.registry, &mut ctx); 477 487 478 - // If structurally valid, create result with context for lazy constraint validation 479 - if errors.is_empty() { 480 - // Convert data to owned for constraint validation 481 - let owned_data = Arc::new(data.clone().into_static()); 482 - ValidationResult::with_context( 483 - errors, 484 - owned_data, 485 - SmolStr::new_static(T::nsid()), 486 - SmolStr::new_static(T::def_name()), 487 - Arc::new(self.registry.clone()), 488 - ) 489 - } else { 490 - ValidationResult::with_structural_errors(errors) 488 + ValidationResult::StructuralOnly { 489 + structural: structural_errors, 491 490 } 492 491 } 493 492
+56
crates/jacquard-lexicon/src/validation/tests.rs
··· 1123 1123 assert_eq!(result.structural_errors().len(), 0); 1124 1124 assert!(result.constraint_errors().len() > 0); 1125 1125 } 1126 + 1127 + #[test] 1128 + fn test_validate_structural_only() { 1129 + let validator = SchemaValidator::new(); 1130 + validator.registry().insert( 1131 + "test.string.constraints".to_smolstr(), 1132 + StringConstraintSchema::lexicon_doc(), 1133 + ); 1134 + 1135 + // String too long (violates constraints) 1136 + let data = Data::Object(jacquard_common::types::value::Object(BTreeMap::from([( 1137 + "text".into(), 1138 + data_string("this string is way too long"), 1139 + )]))); 1140 + 1141 + // Use structural validation only 1142 + let result = validator.validate_structural::<StringConstraintSchema>(&data); 1143 + 1144 + // Structurally valid - type is correct, required field present 1145 + assert!(result.is_structurally_valid()); 1146 + 1147 + // No constraint errors computed 1148 + assert_eq!(result.constraint_errors().len(), 0); 1149 + 1150 + // Result should be StructuralOnly variant 1151 + match result { 1152 + ValidationResult::StructuralOnly { .. } => {} 1153 + ValidationResult::Complete { .. } => panic!("Expected StructuralOnly variant"), 1154 + } 1155 + } 1156 + 1157 + #[test] 1158 + fn test_validate_structural_only_with_errors() { 1159 + let validator = SchemaValidator::new(); 1160 + validator.registry().insert( 1161 + "test.string.constraints".to_smolstr(), 1162 + StringConstraintSchema::lexicon_doc(), 1163 + ); 1164 + 1165 + // Structurally invalid: integer instead of string 1166 + let data = Data::Object(jacquard_common::types::value::Object(BTreeMap::from([( 1167 + "text".into(), 1168 + Data::Integer(42), 1169 + )]))); 1170 + 1171 + let result = validator.validate_structural::<StringConstraintSchema>(&data); 1172 + 1173 + // Not structurally valid 1174 + assert!(!result.is_structurally_valid()); 1175 + 1176 + // Structural errors should be present 1177 + assert_eq!(result.structural_errors().len(), 1); 1178 + 1179 + // No constraint errors 1180 + assert_eq!(result.constraint_errors().len(), 0); 1181 + }