A better Rust ATProto crate

code->lexicon stage 1

Orual 9f1fd63c 7c7de622

+1308
+2
crates/jacquard-lexicon/src/lib.rs
··· 9 9 //! - [`codegen`] - Rust code generation from parsed schemas 10 10 //! - [`corpus`] - Lexicon corpus management and namespace organization 11 11 //! - [`lexicon`] - Schema parsing and validation 12 + //! - [`schema`] - Schema generation from Rust types (reverse codegen) 12 13 //! - [`union_registry`] - Tracks union types for collision detection 13 14 //! - [`fs`] - Filesystem utilities for lexicon storage 14 15 ··· 17 18 pub mod error; 18 19 pub mod fs; 19 20 pub mod lexicon; 21 + pub mod schema; 20 22 pub mod union_registry;
+378
crates/jacquard-lexicon/src/schema.rs
··· 1 + //! # Lexicon Schema Generation 2 + //! 3 + //! This module provides traits and utilities for generating AT Protocol lexicon schemas 4 + //! from Rust types. This is the reverse direction from the usual lexicon→Rust codegen. 5 + //! 6 + //! ## Use Cases 7 + //! 8 + //! - **Rapid prototyping**: Define types in Rust, generate schemas automatically 9 + //! - **Custom lexicons**: Easy creation of third-party lexicons 10 + //! - **Documentation**: Keep types and schemas in sync 11 + //! - **Runtime introspection**: Access schema metadata at runtime 12 + //! 13 + //! ## Usage 14 + //! 15 + //! ### Manual Implementation 16 + //! 17 + //! Implement the `LexiconSchema` trait for your types: 18 + //! 19 + //! ```rust 20 + //! # use jacquard_lexicon::schema::{LexiconSchema, LexiconGenerator}; 21 + //! # use jacquard_lexicon::lexicon::LexiconDoc; 22 + //! struct MyType; 23 + //! 24 + //! impl LexiconSchema for MyType { 25 + //! fn nsid() -> &'static str { 26 + //! "com.example.myType" 27 + //! } 28 + //! 29 + //! fn lexicon_doc(generator: &mut LexiconGenerator) -> LexiconDoc<'static> { 30 + //! // Build schema using generator or builder API 31 + //! todo!() 32 + //! } 33 + //! } 34 + //! ``` 35 + //! 36 + //! ### Derive Macro (Future) 37 + //! 38 + //! In Phase 2, a derive macro will automate trait implementation: 39 + //! 40 + //! ```rust,ignore 41 + //! #[derive(LexiconSchema)] 42 + //! #[lexicon(nsid = "app.bsky.feed.post", record, key = "tid")] 43 + //! struct Post<'a> { 44 + //! #[lexicon(max_graphemes = 300, max_length = 3000)] 45 + //! text: CowStr<'a>, 46 + //! created_at: Datetime, 47 + //! } 48 + //! ``` 49 + //! 50 + //! ## Design Pattern 51 + //! 52 + //! The design follows `schemars` crate patterns: 53 + //! 54 + //! - **Trait-based**: Types implement `LexiconSchema` trait 55 + //! - **Generator pattern**: `LexiconGenerator` tracks refs/defs 56 + //! - **Inline vs refs**: Types control whether they inline or use refs 57 + //! - **Validation**: Runtime constraint checking via `validate()` method 58 + 59 + pub mod builder; 60 + pub mod type_mapping; 61 + 62 + use crate::lexicon::{Lexicon, LexiconDoc, LexObjectProperty, LexRef, LexUserType}; 63 + use jacquard_common::smol_str::SmolStr; 64 + use std::borrow::Cow; 65 + use std::collections::{BTreeMap, HashSet}; 66 + 67 + /// Trait for types that can generate lexicon schemas 68 + pub trait LexiconSchema { 69 + /// The NSID for this type's primary definition 70 + /// 71 + /// For fragments, this is the base NSID (without `#fragment`). 72 + fn nsid() -> &'static str; 73 + 74 + /// The schema ID for this type 75 + /// 76 + /// Defaults to NSID. Override for fragments to include `#fragment` suffix. 77 + fn schema_id() -> Cow<'static, str> { 78 + Cow::Borrowed(Self::nsid()) 79 + } 80 + 81 + /// Whether this type should be inlined vs referenced 82 + /// 83 + /// - `false` (default): Type becomes a def, references use `{"type": "ref", "ref": "nsid"}` 84 + /// - `true`: Type's schema is inlined directly into parent 85 + /// 86 + /// Recursive types MUST return `false` to avoid infinite expansion. 87 + fn inline_schema() -> bool { 88 + false 89 + } 90 + 91 + /// Generate the lexicon document for this type 92 + /// 93 + /// Called by generator to build complete schema. Use `generator` to handle 94 + /// nested types and refs. 95 + fn lexicon_doc(generator: &mut LexiconGenerator) -> LexiconDoc<'static>; 96 + 97 + /// Validate an instance against lexicon constraints 98 + /// 99 + /// Checks runtime constraints like `max_length`, `max_graphemes`, `minimum`, etc. 100 + /// Returns `Ok(())` if valid, `Err` with details if invalid. 101 + fn validate(&self) -> Result<(), ValidationError> { 102 + // Default impl: no constraints to check 103 + Ok(()) 104 + } 105 + } 106 + 107 + /// Error type for validation failures 108 + #[derive(Debug, Clone, thiserror::Error, miette::Diagnostic)] 109 + pub enum ValidationError { 110 + #[error("field `{field}` exceeds maximum length: {actual} > {max}")] 111 + MaxLength { 112 + field: &'static str, 113 + max: usize, 114 + actual: usize, 115 + }, 116 + 117 + #[error("field `{field}` exceeds maximum grapheme count: {actual} > {max}")] 118 + MaxGraphemes { 119 + field: &'static str, 120 + max: usize, 121 + actual: usize, 122 + }, 123 + 124 + #[error("field `{field}` below minimum length: {actual} < {min}")] 125 + MinLength { 126 + field: &'static str, 127 + min: usize, 128 + actual: usize, 129 + }, 130 + 131 + #[error("field `{field}` below minimum grapheme count: {actual} < {min}")] 132 + MinGraphemes { 133 + field: &'static str, 134 + min: usize, 135 + actual: usize, 136 + }, 137 + 138 + #[error("field `{field}` value {actual} exceeds maximum: {max}")] 139 + Maximum { 140 + field: &'static str, 141 + max: i64, 142 + actual: i64, 143 + }, 144 + 145 + #[error("field `{field}` value {actual} below minimum: {min}")] 146 + Minimum { 147 + field: &'static str, 148 + min: i64, 149 + actual: i64, 150 + }, 151 + 152 + #[error("field `{field}` has invalid value: {message}")] 153 + InvalidValue { 154 + field: &'static str, 155 + message: String, 156 + }, 157 + 158 + #[error("multiple validation errors: {0:?}")] 159 + Multiple(Vec<ValidationError>), 160 + } 161 + 162 + /// Generator for lexicon schemas 163 + /// 164 + /// Tracks seen types and manages refs vs inline decisions. 165 + /// Similar pattern to `schemars::SchemaGenerator`. 166 + pub struct LexiconGenerator { 167 + /// Root NSID for the primary type being generated 168 + root_nsid: SmolStr, 169 + 170 + /// Collected definitions (def_name -> LexUserType) 171 + defs: BTreeMap<SmolStr, LexUserType<'static>>, 172 + 173 + /// Types we've seen (prevents duplicate defs) 174 + seen_types: HashSet<SmolStr>, 175 + 176 + /// Stack of types currently being generated (cycle detection) 177 + generation_stack: Vec<SmolStr>, 178 + } 179 + 180 + impl LexiconGenerator { 181 + /// Create a new generator for a primary type 182 + pub fn new(root_nsid: impl Into<SmolStr>) -> Self { 183 + Self { 184 + root_nsid: root_nsid.into(), 185 + defs: BTreeMap::new(), 186 + seen_types: HashSet::new(), 187 + generation_stack: Vec::new(), 188 + } 189 + } 190 + 191 + /// Generate schema for a type that implements LexiconSchema 192 + /// 193 + /// Returns either a ref property or an inline object property depending on `inline_schema()`. 194 + /// Also adds any defs to the generator's collection. 195 + pub fn generate_for<T: LexiconSchema>( 196 + &mut self, 197 + ) -> Result<LexObjectProperty<'static>, GeneratorError> { 198 + let schema_id = T::schema_id(); 199 + 200 + // Check for cycles 201 + if self.generation_stack.contains(&schema_id.as_ref().into()) { 202 + return Err(GeneratorError::RecursiveType { 203 + type_name: schema_id.to_string(), 204 + stack: self.generation_stack.clone(), 205 + }); 206 + } 207 + 208 + // If we've seen this type and it's not inline, return a ref 209 + if !T::inline_schema() && self.seen_types.contains(schema_id.as_ref()) { 210 + return Ok(self.make_ref_property(schema_id.as_ref())); 211 + } 212 + 213 + // Mark as seen and add to stack 214 + self.seen_types.insert(schema_id.to_string().into()); 215 + self.generation_stack.push(schema_id.to_string().into()); 216 + 217 + // Generate the schema 218 + let doc = T::lexicon_doc(self); 219 + 220 + // Pop from stack 221 + self.generation_stack.pop(); 222 + 223 + // If inline, extract the main def and convert to property 224 + if T::inline_schema() { 225 + // Find the main def and convert to property type 226 + if let Some(def) = doc.defs.get("main") { 227 + // Convert LexUserType to LexObjectProperty 228 + match def { 229 + LexUserType::Object(obj) => Ok(LexObjectProperty::Object(obj.clone())), 230 + LexUserType::String(s) => Ok(LexObjectProperty::String(s.clone())), 231 + LexUserType::Array(a) => Ok(LexObjectProperty::Array(a.clone())), 232 + LexUserType::Boolean(b) => Ok(LexObjectProperty::Boolean(b.clone())), 233 + LexUserType::Integer(i) => Ok(LexObjectProperty::Integer(i.clone())), 234 + LexUserType::Blob(b) => Ok(LexObjectProperty::Blob(b.clone())), 235 + LexUserType::Bytes(b) => Ok(LexObjectProperty::Bytes(b.clone())), 236 + LexUserType::CidLink(c) => Ok(LexObjectProperty::CidLink(c.clone())), 237 + LexUserType::Unknown(u) => Ok(LexObjectProperty::Unknown(u.clone())), 238 + // Types that cannot be inlined as properties 239 + _ => Err(GeneratorError::InvalidInlineType { 240 + type_name: schema_id.to_string(), 241 + user_type: format!("{:?}", def), 242 + }), 243 + } 244 + } else { 245 + Err(GeneratorError::EmptyDoc { 246 + type_name: schema_id.to_string(), 247 + }) 248 + } 249 + } else { 250 + // Not inline - merge defs into our collection and return ref 251 + for (def_name, def) in doc.defs { 252 + self.defs.insert(def_name, def); 253 + } 254 + Ok(self.make_ref_property(schema_id.as_ref())) 255 + } 256 + } 257 + 258 + /// Create a ref property to another type 259 + /// 260 + /// Returns a LexObjectProperty::Ref for use in object properties or array items. 261 + pub fn make_ref_property(&self, ref_nsid: &str) -> LexObjectProperty<'static> { 262 + LexObjectProperty::Ref(LexRef { 263 + description: None, 264 + r#ref: ref_nsid.to_string().into(), 265 + }) 266 + } 267 + 268 + /// Build the final lexicon document 269 + pub fn into_doc(self) -> LexiconDoc<'static> { 270 + LexiconDoc { 271 + lexicon: Lexicon::Lexicon1, 272 + id: self.root_nsid.into(), 273 + revision: None, 274 + description: None, 275 + defs: self.defs, 276 + } 277 + } 278 + 279 + /// Add a def directly (for manual construction) 280 + pub fn add_def(&mut self, name: impl Into<SmolStr>, def: LexUserType<'static>) { 281 + self.defs.insert(name.into(), def); 282 + } 283 + 284 + /// Get the root NSID 285 + pub fn root_nsid(&self) -> &str { 286 + &self.root_nsid 287 + } 288 + } 289 + 290 + /// Errors from lexicon generation 291 + #[derive(Debug, Clone, thiserror::Error, miette::Diagnostic)] 292 + pub enum GeneratorError { 293 + #[error("recursive type detected: {type_name}")] 294 + RecursiveType { 295 + type_name: String, 296 + stack: Vec<SmolStr>, 297 + }, 298 + 299 + #[error("type {type_name} generated empty document")] 300 + EmptyDoc { type_name: String }, 301 + 302 + #[error("type {type_name} marked as inline but main def type cannot be inlined: {user_type}")] 303 + InvalidInlineType { 304 + type_name: String, 305 + user_type: String, 306 + }, 307 + 308 + #[error("invalid NSID: {nsid}")] 309 + InvalidNsid { nsid: String }, 310 + } 311 + 312 + #[cfg(test)] 313 + mod tests { 314 + use super::*; 315 + use crate::lexicon::{ 316 + LexBoolean, LexInteger, LexObject, LexRecord, LexRecordRecord, LexString, 317 + }; 318 + 319 + #[test] 320 + fn test_generator_simple() { 321 + let mut generator = LexiconGenerator::new("app.example.test"); 322 + 323 + // Add a simple record 324 + generator.add_def( 325 + "main", 326 + LexUserType::Record(LexRecord { 327 + description: Some("Test record".into()), 328 + key: Some("tid".into()), 329 + record: LexRecordRecord::Object(LexObject { 330 + description: None, 331 + required: Some(vec!["field1".into()]), 332 + nullable: None, 333 + properties: [( 334 + "field1".into(), 335 + LexObjectProperty::String(LexString { 336 + description: None, 337 + format: None, 338 + default: None, 339 + min_length: None, 340 + max_length: None, 341 + min_graphemes: None, 342 + max_graphemes: None, 343 + r#enum: None, 344 + r#const: None, 345 + known_values: None, 346 + }), 347 + )] 348 + .into(), 349 + }), 350 + }), 351 + ); 352 + 353 + let doc = generator.into_doc(); 354 + assert_eq!(doc.id.as_ref(), "app.example.test"); 355 + assert_eq!(doc.defs.len(), 1); 356 + assert!(doc.defs.contains_key("main")); 357 + } 358 + 359 + #[test] 360 + fn test_validation_max_length() { 361 + let err = ValidationError::MaxLength { 362 + field: "text", 363 + max: 100, 364 + actual: 150, 365 + }; 366 + assert!(err.to_string().contains("exceeds maximum length")); 367 + } 368 + 369 + #[test] 370 + fn test_validation_max_graphemes() { 371 + let err = ValidationError::MaxGraphemes { 372 + field: "text", 373 + max: 50, 374 + actual: 75, 375 + }; 376 + assert!(err.to_string().contains("exceeds maximum grapheme count")); 377 + } 378 + }
+530
crates/jacquard-lexicon/src/schema/builder.rs
··· 1 + //! Builder API for manual lexicon schema construction 2 + //! 3 + //! Provides ergonomic API for building lexicon documents without implementing the trait. 4 + //! Useful for prototyping, testing, and dynamic schema generation. 5 + 6 + use crate::lexicon::{ 7 + LexArray, LexArrayItem, LexBoolean, LexInteger, LexObject, LexObjectProperty, LexRecord, 8 + LexRecordRecord, LexRef, LexString, LexStringFormat, LexUserType, LexXrpcBody, 9 + LexXrpcBodySchema, LexXrpcError, LexXrpcParameters, LexXrpcParametersProperty, LexXrpcQuery, 10 + LexXrpcQueryParameter, Lexicon, LexiconDoc, 11 + }; 12 + use jacquard_common::CowStr; 13 + use jacquard_common::smol_str::SmolStr; 14 + use std::collections::BTreeMap; 15 + 16 + /// Builder for lexicon documents 17 + pub struct LexiconDocBuilder { 18 + nsid: SmolStr, 19 + description: Option<CowStr<'static>>, 20 + defs: BTreeMap<SmolStr, LexUserType<'static>>, 21 + } 22 + 23 + impl LexiconDocBuilder { 24 + /// Start building a lexicon document 25 + pub fn new(nsid: impl Into<SmolStr>) -> Self { 26 + Self { 27 + nsid: nsid.into(), 28 + description: None, 29 + defs: BTreeMap::new(), 30 + } 31 + } 32 + 33 + /// Set document description 34 + pub fn description(mut self, desc: impl Into<CowStr<'static>>) -> Self { 35 + self.description = Some(desc.into()); 36 + self 37 + } 38 + 39 + /// Add a record def (becomes "main") 40 + pub fn record(self) -> RecordBuilder { 41 + RecordBuilder { 42 + doc_builder: self, 43 + key: None, 44 + description: None, 45 + properties: BTreeMap::new(), 46 + required: Vec::new(), 47 + } 48 + } 49 + 50 + /// Add an object def 51 + pub fn object(self, name: impl Into<SmolStr>) -> ObjectBuilder { 52 + ObjectBuilder { 53 + doc_builder: self, 54 + def_name: name.into(), 55 + description: None, 56 + properties: BTreeMap::new(), 57 + required: Vec::new(), 58 + } 59 + } 60 + 61 + /// Add a query def 62 + pub fn query(self) -> QueryBuilder { 63 + QueryBuilder { 64 + doc_builder: self, 65 + description: None, 66 + parameters: BTreeMap::new(), 67 + required_params: Vec::new(), 68 + output: None, 69 + errors: Vec::new(), 70 + } 71 + } 72 + 73 + /// Build the final document 74 + pub fn build(self) -> LexiconDoc<'static> { 75 + LexiconDoc { 76 + lexicon: Lexicon::Lexicon1, 77 + id: self.nsid.into(), 78 + revision: None, 79 + description: self.description, 80 + defs: self.defs, 81 + } 82 + } 83 + } 84 + 85 + pub struct RecordBuilder { 86 + doc_builder: LexiconDocBuilder, 87 + key: Option<CowStr<'static>>, 88 + description: Option<CowStr<'static>>, 89 + properties: BTreeMap<SmolStr, LexObjectProperty<'static>>, 90 + required: Vec<SmolStr>, 91 + } 92 + 93 + impl RecordBuilder { 94 + /// Set record key type (e.g., "tid") 95 + pub fn key(mut self, key: impl Into<CowStr<'static>>) -> Self { 96 + self.key = Some(key.into()); 97 + self 98 + } 99 + 100 + /// Set description 101 + pub fn description(mut self, desc: impl Into<CowStr<'static>>) -> Self { 102 + self.description = Some(desc.into()); 103 + self 104 + } 105 + 106 + /// Add a field 107 + pub fn field<F>(mut self, name: impl Into<SmolStr>, builder: F) -> Self 108 + where 109 + F: FnOnce(FieldBuilder) -> FieldBuilder, 110 + { 111 + let field_builder = FieldBuilder::new(); 112 + let field_builder = builder(field_builder); 113 + 114 + let name = name.into(); 115 + if field_builder.required { 116 + self.required.push(name.clone()); 117 + } 118 + 119 + self.properties.insert(name, field_builder.build()); 120 + self 121 + } 122 + 123 + /// Build and add to document 124 + pub fn build(mut self) -> LexiconDocBuilder { 125 + let record_obj = LexObject { 126 + description: self.description, 127 + required: if self.required.is_empty() { 128 + None 129 + } else { 130 + Some(self.required) 131 + }, 132 + nullable: None, 133 + properties: self.properties, 134 + }; 135 + 136 + let record = LexRecord { 137 + description: None, 138 + key: self.key, 139 + record: LexRecordRecord::Object(record_obj), 140 + }; 141 + 142 + self.doc_builder 143 + .defs 144 + .insert("main".into(), LexUserType::Record(record)); 145 + self.doc_builder 146 + } 147 + } 148 + 149 + pub struct ObjectBuilder { 150 + doc_builder: LexiconDocBuilder, 151 + def_name: SmolStr, 152 + description: Option<CowStr<'static>>, 153 + properties: BTreeMap<SmolStr, LexObjectProperty<'static>>, 154 + required: Vec<SmolStr>, 155 + } 156 + 157 + impl ObjectBuilder { 158 + /// Set description 159 + pub fn description(mut self, desc: impl Into<CowStr<'static>>) -> Self { 160 + self.description = Some(desc.into()); 161 + self 162 + } 163 + 164 + /// Add a field 165 + pub fn field<F>(mut self, name: impl Into<SmolStr>, builder: F) -> Self 166 + where 167 + F: FnOnce(FieldBuilder) -> FieldBuilder, 168 + { 169 + let field_builder = FieldBuilder::new(); 170 + let field_builder = builder(field_builder); 171 + 172 + let name = name.into(); 173 + if field_builder.required { 174 + self.required.push(name.clone()); 175 + } 176 + 177 + self.properties.insert(name, field_builder.build()); 178 + self 179 + } 180 + 181 + /// Build and add to document 182 + pub fn build(mut self) -> LexiconDocBuilder { 183 + let object = LexObject { 184 + description: self.description, 185 + required: if self.required.is_empty() { 186 + None 187 + } else { 188 + Some(self.required) 189 + }, 190 + nullable: None, 191 + properties: self.properties, 192 + }; 193 + 194 + self.doc_builder 195 + .defs 196 + .insert(self.def_name, LexUserType::Object(object)); 197 + self.doc_builder 198 + } 199 + } 200 + 201 + pub struct QueryBuilder { 202 + doc_builder: LexiconDocBuilder, 203 + description: Option<CowStr<'static>>, 204 + parameters: BTreeMap<SmolStr, LexXrpcParametersProperty<'static>>, 205 + required_params: Vec<SmolStr>, 206 + output: Option<LexXrpcBody<'static>>, 207 + errors: Vec<LexXrpcError<'static>>, 208 + } 209 + 210 + impl QueryBuilder { 211 + /// Set description 212 + pub fn description(mut self, desc: impl Into<CowStr<'static>>) -> Self { 213 + self.description = Some(desc.into()); 214 + self 215 + } 216 + 217 + /// Add a string parameter 218 + pub fn param_string(mut self, name: impl Into<SmolStr>, required: bool) -> Self { 219 + let param = LexXrpcParametersProperty::String(LexString { 220 + description: None, 221 + format: None, 222 + default: None, 223 + min_length: None, 224 + max_length: None, 225 + min_graphemes: None, 226 + max_graphemes: None, 227 + r#enum: None, 228 + r#const: None, 229 + known_values: None, 230 + }); 231 + 232 + let name = name.into(); 233 + if required { 234 + self.required_params.push(name.clone()); 235 + } 236 + self.parameters.insert(name, param); 237 + self 238 + } 239 + 240 + /// Set output schema 241 + pub fn output( 242 + mut self, 243 + encoding: impl Into<CowStr<'static>>, 244 + schema: LexXrpcBodySchema<'static>, 245 + ) -> Self { 246 + self.output = Some(LexXrpcBody { 247 + description: None, 248 + encoding: encoding.into(), 249 + schema: Some(schema), 250 + }); 251 + self 252 + } 253 + 254 + /// Build and add to document 255 + pub fn build(mut self) -> LexiconDocBuilder { 256 + let params = if self.parameters.is_empty() { 257 + None 258 + } else { 259 + Some(LexXrpcQueryParameter::Params(LexXrpcParameters { 260 + description: None, 261 + required: if self.required_params.is_empty() { 262 + None 263 + } else { 264 + Some(self.required_params) 265 + }, 266 + properties: self.parameters, 267 + })) 268 + }; 269 + 270 + let query = LexXrpcQuery { 271 + description: self.description, 272 + parameters: params, 273 + output: self.output, 274 + errors: if self.errors.is_empty() { 275 + None 276 + } else { 277 + Some(self.errors) 278 + }, 279 + }; 280 + 281 + self.doc_builder 282 + .defs 283 + .insert("main".into(), LexUserType::XrpcQuery(query)); 284 + self.doc_builder 285 + } 286 + } 287 + 288 + pub struct FieldBuilder { 289 + property: Option<LexObjectProperty<'static>>, 290 + required: bool, 291 + } 292 + 293 + impl FieldBuilder { 294 + fn new() -> Self { 295 + Self { 296 + property: None, 297 + required: false, 298 + } 299 + } 300 + 301 + /// Mark field as required 302 + pub fn required(mut self) -> Self { 303 + self.required = true; 304 + self 305 + } 306 + 307 + /// String field 308 + pub fn string(self) -> StringFieldBuilder { 309 + StringFieldBuilder { 310 + field_builder: self, 311 + format: None, 312 + max_length: None, 313 + max_graphemes: None, 314 + min_length: None, 315 + min_graphemes: None, 316 + description: None, 317 + } 318 + } 319 + 320 + /// Integer field 321 + pub fn integer(self) -> IntegerFieldBuilder { 322 + IntegerFieldBuilder { 323 + field_builder: self, 324 + minimum: None, 325 + maximum: None, 326 + description: None, 327 + } 328 + } 329 + 330 + /// Boolean field 331 + pub fn boolean(mut self) -> Self { 332 + self.property = Some(LexObjectProperty::Boolean(LexBoolean { 333 + description: None, 334 + default: None, 335 + r#const: None, 336 + })); 337 + self 338 + } 339 + 340 + /// Ref field (to another type) 341 + pub fn ref_to(mut self, ref_nsid: impl Into<CowStr<'static>>) -> Self { 342 + self.property = Some(LexObjectProperty::Ref(LexRef { 343 + description: None, 344 + r#ref: ref_nsid.into(), 345 + })); 346 + self 347 + } 348 + 349 + /// Array field 350 + pub fn array<F>(mut self, item_builder: F) -> Self 351 + where 352 + F: FnOnce(ArrayItemBuilder) -> ArrayItemBuilder, 353 + { 354 + let builder = ArrayItemBuilder::new(); 355 + let builder = item_builder(builder); 356 + self.property = Some(LexObjectProperty::Array(builder.build())); 357 + self 358 + } 359 + 360 + pub fn build(self) -> LexObjectProperty<'static> { 361 + self.property.expect("field type not set") 362 + } 363 + } 364 + 365 + pub struct StringFieldBuilder { 366 + field_builder: FieldBuilder, 367 + format: Option<LexStringFormat>, 368 + max_length: Option<usize>, 369 + max_graphemes: Option<usize>, 370 + min_length: Option<usize>, 371 + min_graphemes: Option<usize>, 372 + description: Option<CowStr<'static>>, 373 + } 374 + 375 + impl StringFieldBuilder { 376 + pub fn format(mut self, format: LexStringFormat) -> Self { 377 + self.format = Some(format); 378 + self 379 + } 380 + 381 + pub fn max_length(mut self, max: usize) -> Self { 382 + self.max_length = Some(max); 383 + self 384 + } 385 + 386 + pub fn max_graphemes(mut self, max: usize) -> Self { 387 + self.max_graphemes = Some(max); 388 + self 389 + } 390 + 391 + pub fn min_length(mut self, min: usize) -> Self { 392 + self.min_length = Some(min); 393 + self 394 + } 395 + 396 + pub fn min_graphemes(mut self, min: usize) -> Self { 397 + self.min_graphemes = Some(min); 398 + self 399 + } 400 + 401 + pub fn description(mut self, desc: impl Into<CowStr<'static>>) -> Self { 402 + self.description = Some(desc.into()); 403 + self 404 + } 405 + 406 + pub fn required(mut self) -> Self { 407 + self.field_builder.required = true; 408 + self 409 + } 410 + 411 + pub fn build(mut self) -> FieldBuilder { 412 + self.field_builder.property = Some(LexObjectProperty::String(LexString { 413 + description: self.description, 414 + format: self.format, 415 + default: None, 416 + min_length: self.min_length, 417 + max_length: self.max_length, 418 + min_graphemes: self.min_graphemes, 419 + max_graphemes: self.max_graphemes, 420 + r#enum: None, 421 + r#const: None, 422 + known_values: None, 423 + })); 424 + self.field_builder 425 + } 426 + } 427 + 428 + pub struct IntegerFieldBuilder { 429 + field_builder: FieldBuilder, 430 + minimum: Option<i64>, 431 + maximum: Option<i64>, 432 + description: Option<CowStr<'static>>, 433 + } 434 + 435 + impl IntegerFieldBuilder { 436 + pub fn minimum(mut self, min: i64) -> Self { 437 + self.minimum = Some(min); 438 + self 439 + } 440 + 441 + pub fn maximum(mut self, max: i64) -> Self { 442 + self.maximum = Some(max); 443 + self 444 + } 445 + 446 + pub fn description(mut self, desc: impl Into<CowStr<'static>>) -> Self { 447 + self.description = Some(desc.into()); 448 + self 449 + } 450 + 451 + pub fn build(mut self) -> FieldBuilder { 452 + self.field_builder.property = Some(LexObjectProperty::Integer(LexInteger { 453 + description: self.description, 454 + default: None, 455 + minimum: self.minimum, 456 + maximum: self.maximum, 457 + r#enum: None, 458 + r#const: None, 459 + })); 460 + self.field_builder 461 + } 462 + } 463 + 464 + pub struct ArrayItemBuilder { 465 + item: Option<LexArrayItem<'static>>, 466 + description: Option<CowStr<'static>>, 467 + min_length: Option<usize>, 468 + max_length: Option<usize>, 469 + } 470 + 471 + impl ArrayItemBuilder { 472 + fn new() -> Self { 473 + Self { 474 + item: None, 475 + description: None, 476 + min_length: None, 477 + max_length: None, 478 + } 479 + } 480 + 481 + pub fn description(mut self, desc: impl Into<CowStr<'static>>) -> Self { 482 + self.description = Some(desc.into()); 483 + self 484 + } 485 + 486 + pub fn min_length(mut self, min: usize) -> Self { 487 + self.min_length = Some(min); 488 + self 489 + } 490 + 491 + pub fn max_length(mut self, max: usize) -> Self { 492 + self.max_length = Some(max); 493 + self 494 + } 495 + 496 + /// String items 497 + pub fn string_items(mut self) -> Self { 498 + self.item = Some(LexArrayItem::String(LexString { 499 + description: None, 500 + format: None, 501 + default: None, 502 + min_length: None, 503 + max_length: None, 504 + min_graphemes: None, 505 + max_graphemes: None, 506 + r#enum: None, 507 + r#const: None, 508 + known_values: None, 509 + })); 510 + self 511 + } 512 + 513 + /// Ref items 514 + pub fn ref_items(mut self, ref_nsid: impl Into<CowStr<'static>>) -> Self { 515 + self.item = Some(LexArrayItem::Ref(LexRef { 516 + description: None, 517 + r#ref: ref_nsid.into(), 518 + })); 519 + self 520 + } 521 + 522 + fn build(self) -> LexArray<'static> { 523 + LexArray { 524 + description: self.description, 525 + items: self.item.expect("array item type not set"), 526 + min_length: self.min_length, 527 + max_length: self.max_length, 528 + } 529 + } 530 + }
+176
crates/jacquard-lexicon/src/schema/type_mapping.rs
··· 1 + //! Type mapping utilities for converting Rust types to lexicon primitives 2 + //! 3 + //! These utilities parse Rust types using `syn` to determine their lexicon equivalents. 4 + //! Used by the derive macro in Phase 2. 5 + 6 + use syn; 7 + 8 + /// Detect the lexicon type for a Rust type path 9 + /// 10 + /// Used by derive macro to map field types to lexicon primitives. 11 + pub fn rust_type_to_lexicon_type(ty: &syn::Type) -> Option<LexiconPrimitiveType> { 12 + match ty { 13 + syn::Type::Path(type_path) => { 14 + let path = &type_path.path; 15 + let last_segment = path.segments.last()?; 16 + 17 + match last_segment.ident.to_string().as_str() { 18 + // Boolean types 19 + "bool" => Some(LexiconPrimitiveType::Boolean), 20 + 21 + // Integer types (lexicon integers are i64) 22 + "i8" | "i16" | "i32" | "i64" | "isize" => Some(LexiconPrimitiveType::Integer), 23 + // Note: unsigned types not directly supported by lexicon spec 24 + // Users should use i64 or cast to i64 25 + "u8" | "u16" | "u32" | "u64" | "usize" => Some(LexiconPrimitiveType::Integer), 26 + 27 + // String types (Rust primitives) 28 + "String" | "str" => Some(LexiconPrimitiveType::String(StringFormat::Plain)), 29 + 30 + // jacquard string types 31 + "CowStr" | "SmolStr" => Some(LexiconPrimitiveType::String(StringFormat::Plain)), 32 + "Did" => Some(LexiconPrimitiveType::String(StringFormat::Did)), 33 + "Handle" => Some(LexiconPrimitiveType::String(StringFormat::Handle)), 34 + "AtUri" => Some(LexiconPrimitiveType::String(StringFormat::AtUri)), 35 + "Nsid" => Some(LexiconPrimitiveType::String(StringFormat::Nsid)), 36 + "Cid" => Some(LexiconPrimitiveType::String(StringFormat::Cid)), 37 + "Datetime" => Some(LexiconPrimitiveType::String(StringFormat::Datetime)), 38 + "Language" => Some(LexiconPrimitiveType::String(StringFormat::Language)), 39 + "Tid" => Some(LexiconPrimitiveType::String(StringFormat::Tid)), 40 + "RecordKey" => Some(LexiconPrimitiveType::String(StringFormat::RecordKey)), 41 + 42 + // IPLD types 43 + "Bytes" if is_bytes_type(path) => Some(LexiconPrimitiveType::Bytes), 44 + "CidLink" => Some(LexiconPrimitiveType::CidLink), 45 + 46 + // Blob type 47 + "Blob" => Some(LexiconPrimitiveType::Blob), 48 + 49 + // Unknown/unvalidated data 50 + "Data" | "RawData" => Some(LexiconPrimitiveType::Unknown), 51 + "Vec" => { 52 + // Extract Vec<T> item type 53 + if let syn::PathArguments::AngleBracketed(args) = &last_segment.arguments { 54 + if let Some(syn::GenericArgument::Type(inner_ty)) = args.args.first() { 55 + return Some(LexiconPrimitiveType::Array(Box::new( 56 + rust_type_to_lexicon_type(inner_ty)?, 57 + ))); 58 + } 59 + } 60 + None 61 + } 62 + "Option" => { 63 + // Extract Option<T> inner type - mark as optional 64 + if let syn::PathArguments::AngleBracketed(args) = &last_segment.arguments { 65 + if let Some(syn::GenericArgument::Type(inner_ty)) = args.args.first() { 66 + return rust_type_to_lexicon_type(inner_ty); 67 + } 68 + } 69 + None 70 + } 71 + _ => None, 72 + } 73 + } 74 + _ => None, 75 + } 76 + } 77 + 78 + /// Check if a path represents bytes::Bytes 79 + fn is_bytes_type(path: &syn::Path) -> bool { 80 + if path.segments.len() == 2 { 81 + let first = &path.segments[0].ident; 82 + let second = &path.segments[1].ident; 83 + first == "bytes" && second == "Bytes" 84 + } else { 85 + false 86 + } 87 + } 88 + 89 + /// Classification of lexicon primitive types 90 + #[derive(Debug, Clone, PartialEq, Eq)] 91 + pub enum LexiconPrimitiveType { 92 + Boolean, 93 + Integer, 94 + String(StringFormat), 95 + Bytes, 96 + CidLink, 97 + Blob, 98 + Unknown, 99 + Array(Box<LexiconPrimitiveType>), 100 + Object, // For structs 101 + Ref(String), // For types with LexiconSchema impl 102 + Union(Vec<String>), // For enums with #[open_union] 103 + } 104 + 105 + #[derive(Debug, Clone, PartialEq, Eq)] 106 + pub enum StringFormat { 107 + Plain, 108 + Did, 109 + Handle, 110 + AtUri, 111 + Nsid, 112 + Cid, 113 + Datetime, 114 + Language, 115 + Tid, 116 + RecordKey, 117 + AtIdentifier, 118 + Uri, 119 + } 120 + 121 + /// Extract constraints from field attributes 122 + pub fn extract_field_constraints(attrs: &[syn::Attribute]) -> FieldConstraints { 123 + let mut constraints = FieldConstraints::default(); 124 + 125 + for attr in attrs { 126 + if !attr.path().is_ident("lexicon") { 127 + continue; 128 + } 129 + 130 + let _ = attr.parse_nested_meta(|meta| { 131 + if meta.path.is_ident("max_length") { 132 + if let Ok(lit) = meta.value()?.parse::<syn::LitInt>() { 133 + constraints.max_length = Some(lit.base10_parse()?); 134 + } 135 + } else if meta.path.is_ident("max_graphemes") { 136 + if let Ok(lit) = meta.value()?.parse::<syn::LitInt>() { 137 + constraints.max_graphemes = Some(lit.base10_parse()?); 138 + } 139 + } else if meta.path.is_ident("min_length") { 140 + if let Ok(lit) = meta.value()?.parse::<syn::LitInt>() { 141 + constraints.min_length = Some(lit.base10_parse()?); 142 + } 143 + } else if meta.path.is_ident("min_graphemes") { 144 + if let Ok(lit) = meta.value()?.parse::<syn::LitInt>() { 145 + constraints.min_graphemes = Some(lit.base10_parse()?); 146 + } 147 + } else if meta.path.is_ident("minimum") { 148 + if let Ok(lit) = meta.value()?.parse::<syn::LitInt>() { 149 + constraints.minimum = Some(lit.base10_parse()?); 150 + } 151 + } else if meta.path.is_ident("maximum") { 152 + if let Ok(lit) = meta.value()?.parse::<syn::LitInt>() { 153 + constraints.maximum = Some(lit.base10_parse()?); 154 + } 155 + } else if meta.path.is_ident("ref") { 156 + if let Ok(lit) = meta.value()?.parse::<syn::LitStr>() { 157 + constraints.explicit_ref = Some(lit.value()); 158 + } 159 + } 160 + Ok(()) 161 + }); 162 + } 163 + 164 + constraints 165 + } 166 + 167 + #[derive(Debug, Default, Clone)] 168 + pub struct FieldConstraints { 169 + pub max_length: Option<usize>, 170 + pub max_graphemes: Option<usize>, 171 + pub min_length: Option<usize>, 172 + pub min_graphemes: Option<usize>, 173 + pub minimum: Option<i64>, 174 + pub maximum: Option<i64>, 175 + pub explicit_ref: Option<String>, 176 + }
+85
crates/jacquard-lexicon/tests/builder_tests.rs
··· 1 + use jacquard_lexicon::lexicon::LexStringFormat; 2 + use jacquard_lexicon::schema::builder::LexiconDocBuilder; 3 + 4 + #[test] 5 + fn test_builder_simple_record() { 6 + let doc = LexiconDocBuilder::new("app.example.test") 7 + .description("Test record") 8 + .record() 9 + .key("tid") 10 + .field("text", |f| f.string().max_length(1000).required().build()) 11 + .field("createdAt", |f| { 12 + f.string() 13 + .format(LexStringFormat::Datetime) 14 + .required() 15 + .build() 16 + }) 17 + .build() 18 + .build(); 19 + 20 + assert_eq!(doc.id.as_ref(), "app.example.test"); 21 + assert_eq!(doc.defs.len(), 1); 22 + 23 + // Serialize and verify 24 + let json = serde_json::to_string_pretty(&doc).unwrap(); 25 + println!("{}", json); 26 + 27 + assert!(json.contains("\"type\": \"record\"")); 28 + assert!(json.contains("\"maxLength\": 1000")); 29 + } 30 + 31 + #[test] 32 + fn test_builder_query() { 33 + let doc = LexiconDocBuilder::new("app.example.getPost") 34 + .description("Get a post") 35 + .query() 36 + .description("Retrieve a post by URI") 37 + .param_string("uri", true) 38 + .build() 39 + .build(); 40 + 41 + assert_eq!(doc.id.as_ref(), "app.example.getPost"); 42 + assert_eq!(doc.defs.len(), 1); 43 + 44 + let json = serde_json::to_string_pretty(&doc).unwrap(); 45 + println!("{}", json); 46 + 47 + assert!(json.contains("\"type\": \"query\"")); 48 + } 49 + 50 + #[test] 51 + fn test_builder_object_with_ref() { 52 + let doc = LexiconDocBuilder::new("app.example.types") 53 + .object("post") 54 + .field("uri", |f| { 55 + f.string().format(LexStringFormat::AtUri).required().build() 56 + }) 57 + .field("author", |f| f.ref_to("app.bsky.actor.defs#profileView")) 58 + .build() 59 + .build(); 60 + 61 + assert_eq!(doc.id.as_ref(), "app.example.types"); 62 + assert_eq!(doc.defs.len(), 1); 63 + 64 + let json = serde_json::to_string_pretty(&doc).unwrap(); 65 + println!("{}", json); 66 + 67 + assert!(json.contains("\"type\": \"ref\"")); 68 + assert!(json.contains("app.bsky.actor.defs#profileView")); 69 + } 70 + 71 + #[test] 72 + fn test_builder_array_field() { 73 + let doc = LexiconDocBuilder::new("app.example.list") 74 + .record() 75 + .field("items", |f| f.array(|a| a.string_items().max_length(100))) 76 + .build() 77 + .build(); 78 + 79 + assert_eq!(doc.id.as_ref(), "app.example.list"); 80 + 81 + let json = serde_json::to_string_pretty(&doc).unwrap(); 82 + println!("{}", json); 83 + 84 + assert!(json.contains("\"type\": \"array\"")); 85 + }
+137
crates/jacquard-lexicon/tests/schema_tests.rs
··· 1 + use jacquard_common::types::string::Datetime; 2 + use jacquard_common::CowStr; 3 + use jacquard_lexicon::lexicon::{ 4 + Lexicon, LexObject, LexObjectProperty, LexRecord, LexRecordRecord, LexString, 5 + LexStringFormat, LexUserType, LexiconDoc, 6 + }; 7 + use jacquard_lexicon::schema::{LexiconGenerator, LexiconSchema, ValidationError}; 8 + use std::collections::BTreeMap; 9 + 10 + // Simple test type 11 + #[derive(Debug, Clone)] 12 + struct SimpleRecord<'a> { 13 + text: CowStr<'a>, 14 + timestamp: Datetime, 15 + } 16 + 17 + impl LexiconSchema for SimpleRecord<'_> { 18 + fn nsid() -> &'static str { 19 + "com.example.simple" 20 + } 21 + 22 + fn lexicon_doc(_generator: &mut LexiconGenerator) -> LexiconDoc<'static> { 23 + let mut properties = BTreeMap::new(); 24 + 25 + properties.insert( 26 + "text".into(), 27 + LexObjectProperty::String(LexString { 28 + description: None, 29 + format: None, 30 + default: None, 31 + min_length: None, 32 + max_length: Some(1000), 33 + min_graphemes: None, 34 + max_graphemes: None, 35 + r#enum: None, 36 + r#const: None, 37 + known_values: None, 38 + }), 39 + ); 40 + 41 + properties.insert( 42 + "timestamp".into(), 43 + LexObjectProperty::String(LexString { 44 + description: None, 45 + format: Some(LexStringFormat::Datetime), 46 + default: None, 47 + min_length: None, 48 + max_length: None, 49 + min_graphemes: None, 50 + max_graphemes: None, 51 + r#enum: None, 52 + r#const: None, 53 + known_values: None, 54 + }), 55 + ); 56 + 57 + let record_obj = LexObject { 58 + description: None, 59 + required: Some(vec!["text".into(), "timestamp".into()]), 60 + nullable: None, 61 + properties, 62 + }; 63 + 64 + let record = LexRecord { 65 + description: Some("Simple record type".into()), 66 + key: Some("tid".into()), 67 + record: LexRecordRecord::Object(record_obj), 68 + }; 69 + 70 + let mut defs = BTreeMap::new(); 71 + defs.insert("main".into(), LexUserType::Record(record)); 72 + 73 + LexiconDoc { 74 + lexicon: Lexicon::Lexicon1, 75 + id: Self::nsid().into(), 76 + revision: None, 77 + description: Some("Test schema".into()), 78 + defs, 79 + } 80 + } 81 + 82 + fn validate(&self) -> Result<(), ValidationError> { 83 + // Check text length 84 + if self.text.len() > 1000 { 85 + return Err(ValidationError::MaxLength { 86 + field: "text", 87 + max: 1000, 88 + actual: self.text.len(), 89 + }); 90 + } 91 + 92 + Ok(()) 93 + } 94 + } 95 + 96 + #[test] 97 + fn test_manual_impl_generates_valid_schema() { 98 + let mut generator = LexiconGenerator::new(SimpleRecord::nsid()); 99 + let doc = SimpleRecord::lexicon_doc(&mut generator); 100 + 101 + // Verify structure 102 + assert_eq!(doc.id.as_ref(), "com.example.simple"); 103 + assert!(doc.defs.contains_key("main")); 104 + 105 + // Serialize to JSON 106 + let json = serde_json::to_string_pretty(&doc).expect("serialize"); 107 + println!("{}", json); 108 + 109 + // Should be valid lexicon JSON 110 + assert!(json.contains("\"lexicon\": 1")); 111 + assert!(json.contains("\"id\": \"com.example.simple\"")); 112 + } 113 + 114 + #[test] 115 + fn test_validation_works() { 116 + let record = SimpleRecord { 117 + text: "a".repeat(5000).into(), // Too long 118 + timestamp: Datetime::now(), 119 + }; 120 + 121 + let result = record.validate(); 122 + assert!(result.is_err()); 123 + 124 + let err = result.unwrap_err(); 125 + assert!(matches!(err, ValidationError::MaxLength { .. })); 126 + } 127 + 128 + #[test] 129 + fn test_validation_passes() { 130 + let record = SimpleRecord { 131 + text: "Hello, world!".into(), 132 + timestamp: Datetime::now(), 133 + }; 134 + 135 + let result = record.validate(); 136 + assert!(result.is_ok()); 137 + }