schema discovery · nonbinary.computer/jacquard@4eb26a0

nonbinary.computer / jacquard

fork atom

A better Rust ATProto crate

fork atom

schema discovery

Orual 4 months ago 4eb26a04 92bfd6b5

build.yml

failed 2min 13s

+284 -17

2 changed files

expand all

crates

jacquard-lexgen

src

schema_discovery.rs

jacquard-lexicon

src

derive_impl

doc_to_tokens.rs

+269 -10

crates/jacquard-lexgen/src/schema_discovery.rs

··· 9 9 //! use jacquard_lexgen::schema_discovery::WorkspaceDiscovery; 10 10 //! 11 11 //! fn main() -> miette::Result<()> { 12 - //! // Discover all schemas in workspace 13 - //! let schemas = WorkspaceDiscovery::new() 14 - //! .scan()?; 15 - //! 16 - //! println!("Found {} schemas", schemas.len()); 17 - //! 18 - //! for schema in schemas { 19 - //! println!(" {}: {}", schema.nsid, schema.source_path.display()); 20 - //! } 12 + //! // Discover and generate schemas 13 + //! WorkspaceDiscovery::new() 14 + //! .verbose(true) 15 + //! .generate_and_write("lexicons")?; 21 16 //! 22 17 //! Ok(()) 23 18 //! } 24 19 //! ``` 25 20 21 + use jacquard_lexicon::lexicon::LexiconDoc; 26 22 use miette::{IntoDiagnostic, Result}; 23 + use std::collections::BTreeMap; 27 24 use std::path::{Path, PathBuf}; 28 - use syn::{Attribute, Item}; 25 + use syn::{Attribute, DeriveInput, Item}; 29 26 30 27 /// Discovered schema type 31 28 #[derive(Debug, Clone)] ··· 59 56 pub key: Option<String>, 60 57 } 61 58 59 + /// Generated schema with full LexiconDoc 60 + #[derive(Debug, Clone)] 61 + pub struct GeneratedSchema { 62 + /// The NSID from the generated schema 63 + pub nsid: String, 64 + /// The schema_id (may include fragment) 65 + pub schema_id: String, 66 + /// The generated lexicon document 67 + pub doc: LexiconDoc<'static>, 68 + /// Source file containing this type 69 + pub source_path: PathBuf, 70 + } 71 + 62 72 /// Workspace schema discovery via source scanning 63 73 pub struct WorkspaceDiscovery { 64 74 workspace_root: PathBuf, ··· 118 128 Ok(schemas) 119 129 } 120 130 131 + /// Scan workspace and generate complete schemas 132 + pub fn scan_and_generate(&self) -> Result<Vec<GeneratedSchema>> { 133 + let discovered = self.scan()?; 134 + 135 + if self.verbose { 136 + println!("Generating schemas for {} types...", discovered.len()); 137 + } 138 + 139 + let mut generated = Vec::new(); 140 + 141 + for schema_info in discovered { 142 + if self.verbose { 143 + println!( 144 + "Generating schema for {}: {}", 145 + schema_info.type_name, schema_info.nsid 146 + ); 147 + } 148 + 149 + // Re-parse the source file to get full AST 150 + let contents = std::fs::read_to_string(&schema_info.source_path).into_diagnostic()?; 151 + let file = syn::parse_file(&contents).into_diagnostic()?; 152 + 153 + // Find the specific type 154 + let ast = self.find_type_in_file(&file, &schema_info.type_name)?; 155 + 156 + // Use schema builder based on kind 157 + let built = match schema_info.kind { 158 + SchemaKind::Struct => { 159 + jacquard_lexicon::schema::from_ast::build_struct_schema(&ast)? 160 + } 161 + SchemaKind::Enum => { 162 + jacquard_lexicon::schema::from_ast::build_enum_schema(&ast)? 163 + } 164 + }; 165 + 166 + generated.push(GeneratedSchema { 167 + nsid: built.nsid, 168 + schema_id: built.schema_id, 169 + doc: built.doc, 170 + source_path: schema_info.source_path.clone(), 171 + }); 172 + } 173 + 174 + if self.verbose { 175 + println!("Generated {} schemas", generated.len()); 176 + } 177 + 178 + Ok(generated) 179 + } 180 + 181 + /// Generate schemas and write to directory 182 + pub fn generate_and_write(&self, output_dir: impl AsRef<Path>) -> Result<()> { 183 + let schemas = self.scan_and_generate()?; 184 + 185 + if schemas.is_empty() { 186 + println!("No schemas found to generate"); 187 + return Ok(()); 188 + } 189 + 190 + // Group by base NSID (strip #fragments) 191 + let grouped = self.group_by_base_nsid(&schemas); 192 + 193 + // Create output directory 194 + std::fs::create_dir_all(output_dir.as_ref()).into_diagnostic()?; 195 + 196 + // Write each group 197 + let mut written = 0; 198 + for (base_nsid, group) in &grouped { 199 + self.write_lexicon_file(output_dir.as_ref(), base_nsid, group)?; 200 + written += 1; 201 + } 202 + 203 + println!( 204 + "✓ Wrote {} lexicon files to {}", 205 + written, 206 + output_dir.as_ref().display() 207 + ); 208 + 209 + Ok(()) 210 + } 211 + 212 + /// Group schemas by base NSID (strip fragment suffix) 213 + fn group_by_base_nsid(&self, schemas: &[GeneratedSchema]) -> BTreeMap<String, Vec<&GeneratedSchema>> { 214 + let mut groups: BTreeMap<String, Vec<&GeneratedSchema>> = BTreeMap::new(); 215 + 216 + for schema in schemas { 217 + // Split on # to get base NSID 218 + let base_nsid = if let Some(pos) = schema.nsid.find('#') { 219 + &schema.nsid[..pos] 220 + } else { 221 + &schema.nsid 222 + }; 223 + 224 + groups 225 + .entry(base_nsid.to_string()) 226 + .or_default() 227 + .push(schema); 228 + } 229 + 230 + groups 231 + } 232 + 233 + /// Write a single lexicon file 234 + fn write_lexicon_file( 235 + &self, 236 + output_dir: &Path, 237 + base_nsid: &str, 238 + schemas: &[&GeneratedSchema], 239 + ) -> Result<()> { 240 + use jacquard_lexicon::lexicon::Lexicon; 241 + 242 + // Merge all defs into one LexiconDoc 243 + let mut all_defs = BTreeMap::new(); 244 + let mut primary_doc: Option<LexiconDoc> = None; 245 + 246 + for schema in schemas { 247 + // Determine if this is the primary def or a fragment 248 + if schema.nsid.contains('#') { 249 + // Fragment - extract def name and add to defs 250 + let fragment_name = schema.nsid.split('#').nth(1).unwrap(); 251 + 252 + // Merge defs from fragment doc 253 + for (def_name, def) in &schema.doc.defs { 254 + // Use fragment name if def is "main", otherwise use as-is 255 + let final_name = if def_name == "main" { 256 + fragment_name.to_string() 257 + } else { 258 + def_name.to_string() 259 + }; 260 + all_defs.insert(final_name, def.clone()); 261 + } 262 + } else { 263 + // Primary type - use as base doc 264 + primary_doc = Some(schema.doc.clone()); 265 + } 266 + } 267 + 268 + // Build final doc 269 + let mut final_doc = primary_doc.unwrap_or_else(|| LexiconDoc { 270 + lexicon: Lexicon::Lexicon1, 271 + id: base_nsid.into(), 272 + revision: None, 273 + description: None, 274 + defs: BTreeMap::new(), 275 + }); 276 + 277 + // Merge in all defs 278 + for (k, v) in all_defs { 279 + final_doc.defs.insert(k.into(), v); 280 + } 281 + 282 + // Serialize to JSON with "main" def first 283 + let json = self.serialize_with_main_first(&final_doc)?; 284 + 285 + // Write to file 286 + let filename = base_nsid.replace('.', "_") + ".json"; 287 + let path = output_dir.join(&filename); 288 + 289 + std::fs::write(&path, json).into_diagnostic()?; 290 + 291 + if self.verbose { 292 + println!(" Wrote {} ({} defs)", filename, final_doc.defs.len()); 293 + } 294 + 295 + Ok(()) 296 + } 297 + 298 + /// Serialize a lexicon doc with "main" def first 299 + fn serialize_with_main_first(&self, doc: &LexiconDoc) -> Result<String> { 300 + use serde_json::{json, Map, Value}; 301 + 302 + // Build defs map with main first 303 + let mut defs_map = Map::new(); 304 + 305 + // Insert main first if it exists 306 + if let Some(main_def) = doc.defs.get("main") { 307 + let main_value = serde_json::to_value(main_def).into_diagnostic()?; 308 + defs_map.insert("main".to_string(), main_value); 309 + } 310 + 311 + // Insert all other defs in sorted order 312 + for (name, def) in &doc.defs { 313 + if name != "main" { 314 + let def_value = serde_json::to_value(def).into_diagnostic()?; 315 + defs_map.insert(name.to_string(), def_value); 316 + } 317 + } 318 + 319 + // Build final JSON object 320 + let mut obj = Map::new(); 321 + obj.insert("lexicon".to_string(), json!(1)); 322 + obj.insert("id".to_string(), json!(doc.id.as_ref())); 323 + 324 + if let Some(rev) = &doc.revision { 325 + obj.insert("revision".to_string(), json!(rev)); 326 + } 327 + 328 + if let Some(desc) = &doc.description { 329 + obj.insert("description".to_string(), json!(desc)); 330 + } 331 + 332 + obj.insert("defs".to_string(), Value::Object(defs_map)); 333 + 334 + // Pretty-print JSON 335 + serde_json::to_string_pretty(&Value::Object(obj)).into_diagnostic() 336 + } 337 + 121 338 /// Find workspace members by parsing Cargo.toml 122 339 fn find_workspace_members(&self) -> Result<Vec<PathBuf>> { 123 340 let cargo_toml = self.workspace_root.join("Cargo.toml"); ··· 218 435 } 219 436 220 437 Ok(schemas) 438 + } 439 + 440 + /// Find a type in a parsed file and convert to DeriveInput 441 + fn find_type_in_file(&self, file: &syn::File, type_name: &str) -> Result<DeriveInput> { 442 + for item in &file.items { 443 + match item { 444 + Item::Struct(item_struct) if item_struct.ident == type_name => { 445 + // Convert ItemStruct to DeriveInput 446 + return Ok(DeriveInput { 447 + attrs: item_struct.attrs.clone(), 448 + vis: item_struct.vis.clone(), 449 + ident: item_struct.ident.clone(), 450 + generics: item_struct.generics.clone(), 451 + data: syn::Data::Struct(syn::DataStruct { 452 + struct_token: item_struct.struct_token, 453 + fields: item_struct.fields.clone(), 454 + semi_token: item_struct.semi_token, 455 + }), 456 + }); 457 + } 458 + Item::Enum(item_enum) if item_enum.ident == type_name => { 459 + // Convert ItemEnum to DeriveInput 460 + return Ok(DeriveInput { 461 + attrs: item_enum.attrs.clone(), 462 + vis: item_enum.vis.clone(), 463 + ident: item_enum.ident.clone(), 464 + generics: item_enum.generics.clone(), 465 + data: syn::Data::Enum(syn::DataEnum { 466 + enum_token: item_enum.enum_token, 467 + brace_token: item_enum.brace_token, 468 + variants: item_enum.variants.clone(), 469 + }), 470 + }); 471 + } 472 + _ => continue, 473 + } 474 + } 475 + 476 + Err(miette::miette!( 477 + "Type {} not found in source file", 478 + type_name 479 + )) 221 480 } 222 481 223 482 /// Extract schema info from attributes

+15 -7

crates/jacquard-lexicon/src/derive_impl/doc_to_tokens.rs

··· 182 182 /// Convert LexObjectProperty to tokens 183 183 fn object_property_to_tokens(prop: &LexObjectProperty) -> TokenStream { 184 184 match prop { 185 - LexObjectProperty::Boolean(b) => quote! { 185 + LexObjectProperty::Boolean(_) => quote! { 186 186 ::jacquard_lexicon::lexicon::LexObjectProperty::Boolean( 187 187 ::jacquard_lexicon::lexicon::LexBoolean { 188 188 description: None, ··· 374 374 LexStringFormat::AtUri => quote! { ::jacquard_lexicon::lexicon::LexStringFormat::AtUri }, 375 375 LexStringFormat::Nsid => quote! { ::jacquard_lexicon::lexicon::LexStringFormat::Nsid }, 376 376 LexStringFormat::Cid => quote! { ::jacquard_lexicon::lexicon::LexStringFormat::Cid }, 377 - LexStringFormat::Datetime => quote! { ::jacquard_lexicon::lexicon::LexStringFormat::Datetime }, 378 - LexStringFormat::Language => quote! { ::jacquard_lexicon::lexicon::LexStringFormat::Language }, 377 + LexStringFormat::Datetime => { 378 + quote! { ::jacquard_lexicon::lexicon::LexStringFormat::Datetime } 379 + } 380 + LexStringFormat::Language => { 381 + quote! { ::jacquard_lexicon::lexicon::LexStringFormat::Language } 382 + } 379 383 LexStringFormat::Tid => quote! { ::jacquard_lexicon::lexicon::LexStringFormat::Tid }, 380 - LexStringFormat::RecordKey => quote! { ::jacquard_lexicon::lexicon::LexStringFormat::RecordKey }, 381 - LexStringFormat::AtIdentifier => quote! { ::jacquard_lexicon::lexicon::LexStringFormat::AtIdentifier }, 384 + LexStringFormat::RecordKey => { 385 + quote! { ::jacquard_lexicon::lexicon::LexStringFormat::RecordKey } 386 + } 387 + LexStringFormat::AtIdentifier => { 388 + quote! { ::jacquard_lexicon::lexicon::LexStringFormat::AtIdentifier } 389 + } 382 390 LexStringFormat::Uri => quote! { ::jacquard_lexicon::lexicon::LexStringFormat::Uri }, 383 391 }); 384 392 let min_len = option_to_tokens(&s.min_length, |v| quote! { #v }); ··· 457 465 quote! { 458 466 ::jacquard_lexicon::lexicon::LexXrpcParametersProperty::String(#string_tokens) 459 467 } 460 - }, 468 + } 461 469 LexXrpcParametersProperty::Unknown(_) => quote! { 462 470 ::jacquard_lexicon::lexicon::LexXrpcParametersProperty::Unknown( 463 471 ::jacquard_lexicon::lexicon::LexUnknown { description: None } ··· 510 518 } 511 519 ) 512 520 } 513 - }, 521 + } 514 522 } 515 523 } 516 524