tangled
alpha
login
or
join now
nonbinary.computer
/
jacquard
80
fork
atom
A better Rust ATProto crate
80
fork
atom
overview
issues
9
pulls
pipelines
schema discovery
Orual
4 months ago
4eb26a04
92bfd6b5
0/1
build.yml
failed
2min 13s
+284
-17
2 changed files
expand all
collapse all
unified
split
crates
jacquard-lexgen
src
schema_discovery.rs
jacquard-lexicon
src
derive_impl
doc_to_tokens.rs
+269
-10
crates/jacquard-lexgen/src/schema_discovery.rs
···
9
9
//! use jacquard_lexgen::schema_discovery::WorkspaceDiscovery;
10
10
//!
11
11
//! fn main() -> miette::Result<()> {
12
12
-
//! // Discover all schemas in workspace
13
13
-
//! let schemas = WorkspaceDiscovery::new()
14
14
-
//! .scan()?;
15
15
-
//!
16
16
-
//! println!("Found {} schemas", schemas.len());
17
17
-
//!
18
18
-
//! for schema in schemas {
19
19
-
//! println!(" {}: {}", schema.nsid, schema.source_path.display());
20
20
-
//! }
12
12
+
//! // Discover and generate schemas
13
13
+
//! WorkspaceDiscovery::new()
14
14
+
//! .verbose(true)
15
15
+
//! .generate_and_write("lexicons")?;
21
16
//!
22
17
//! Ok(())
23
18
//! }
24
19
//! ```
25
20
21
21
+
use jacquard_lexicon::lexicon::LexiconDoc;
26
22
use miette::{IntoDiagnostic, Result};
23
23
+
use std::collections::BTreeMap;
27
24
use std::path::{Path, PathBuf};
28
28
-
use syn::{Attribute, Item};
25
25
+
use syn::{Attribute, DeriveInput, Item};
29
26
30
27
/// Discovered schema type
31
28
#[derive(Debug, Clone)]
···
59
56
pub key: Option<String>,
60
57
}
61
58
59
59
+
/// Generated schema with full LexiconDoc
60
60
+
#[derive(Debug, Clone)]
61
61
+
pub struct GeneratedSchema {
62
62
+
/// The NSID from the generated schema
63
63
+
pub nsid: String,
64
64
+
/// The schema_id (may include fragment)
65
65
+
pub schema_id: String,
66
66
+
/// The generated lexicon document
67
67
+
pub doc: LexiconDoc<'static>,
68
68
+
/// Source file containing this type
69
69
+
pub source_path: PathBuf,
70
70
+
}
71
71
+
62
72
/// Workspace schema discovery via source scanning
63
73
pub struct WorkspaceDiscovery {
64
74
workspace_root: PathBuf,
···
118
128
Ok(schemas)
119
129
}
120
130
131
131
+
/// Scan workspace and generate complete schemas
132
132
+
pub fn scan_and_generate(&self) -> Result<Vec<GeneratedSchema>> {
133
133
+
let discovered = self.scan()?;
134
134
+
135
135
+
if self.verbose {
136
136
+
println!("Generating schemas for {} types...", discovered.len());
137
137
+
}
138
138
+
139
139
+
let mut generated = Vec::new();
140
140
+
141
141
+
for schema_info in discovered {
142
142
+
if self.verbose {
143
143
+
println!(
144
144
+
"Generating schema for {}: {}",
145
145
+
schema_info.type_name, schema_info.nsid
146
146
+
);
147
147
+
}
148
148
+
149
149
+
// Re-parse the source file to get full AST
150
150
+
let contents = std::fs::read_to_string(&schema_info.source_path).into_diagnostic()?;
151
151
+
let file = syn::parse_file(&contents).into_diagnostic()?;
152
152
+
153
153
+
// Find the specific type
154
154
+
let ast = self.find_type_in_file(&file, &schema_info.type_name)?;
155
155
+
156
156
+
// Use schema builder based on kind
157
157
+
let built = match schema_info.kind {
158
158
+
SchemaKind::Struct => {
159
159
+
jacquard_lexicon::schema::from_ast::build_struct_schema(&ast)?
160
160
+
}
161
161
+
SchemaKind::Enum => {
162
162
+
jacquard_lexicon::schema::from_ast::build_enum_schema(&ast)?
163
163
+
}
164
164
+
};
165
165
+
166
166
+
generated.push(GeneratedSchema {
167
167
+
nsid: built.nsid,
168
168
+
schema_id: built.schema_id,
169
169
+
doc: built.doc,
170
170
+
source_path: schema_info.source_path.clone(),
171
171
+
});
172
172
+
}
173
173
+
174
174
+
if self.verbose {
175
175
+
println!("Generated {} schemas", generated.len());
176
176
+
}
177
177
+
178
178
+
Ok(generated)
179
179
+
}
180
180
+
181
181
+
/// Generate schemas and write to directory
182
182
+
pub fn generate_and_write(&self, output_dir: impl AsRef<Path>) -> Result<()> {
183
183
+
let schemas = self.scan_and_generate()?;
184
184
+
185
185
+
if schemas.is_empty() {
186
186
+
println!("No schemas found to generate");
187
187
+
return Ok(());
188
188
+
}
189
189
+
190
190
+
// Group by base NSID (strip #fragments)
191
191
+
let grouped = self.group_by_base_nsid(&schemas);
192
192
+
193
193
+
// Create output directory
194
194
+
std::fs::create_dir_all(output_dir.as_ref()).into_diagnostic()?;
195
195
+
196
196
+
// Write each group
197
197
+
let mut written = 0;
198
198
+
for (base_nsid, group) in &grouped {
199
199
+
self.write_lexicon_file(output_dir.as_ref(), base_nsid, group)?;
200
200
+
written += 1;
201
201
+
}
202
202
+
203
203
+
println!(
204
204
+
"✓ Wrote {} lexicon files to {}",
205
205
+
written,
206
206
+
output_dir.as_ref().display()
207
207
+
);
208
208
+
209
209
+
Ok(())
210
210
+
}
211
211
+
212
212
+
/// Group schemas by base NSID (strip fragment suffix)
213
213
+
fn group_by_base_nsid(&self, schemas: &[GeneratedSchema]) -> BTreeMap<String, Vec<&GeneratedSchema>> {
214
214
+
let mut groups: BTreeMap<String, Vec<&GeneratedSchema>> = BTreeMap::new();
215
215
+
216
216
+
for schema in schemas {
217
217
+
// Split on # to get base NSID
218
218
+
let base_nsid = if let Some(pos) = schema.nsid.find('#') {
219
219
+
&schema.nsid[..pos]
220
220
+
} else {
221
221
+
&schema.nsid
222
222
+
};
223
223
+
224
224
+
groups
225
225
+
.entry(base_nsid.to_string())
226
226
+
.or_default()
227
227
+
.push(schema);
228
228
+
}
229
229
+
230
230
+
groups
231
231
+
}
232
232
+
233
233
+
/// Write a single lexicon file
234
234
+
fn write_lexicon_file(
235
235
+
&self,
236
236
+
output_dir: &Path,
237
237
+
base_nsid: &str,
238
238
+
schemas: &[&GeneratedSchema],
239
239
+
) -> Result<()> {
240
240
+
use jacquard_lexicon::lexicon::Lexicon;
241
241
+
242
242
+
// Merge all defs into one LexiconDoc
243
243
+
let mut all_defs = BTreeMap::new();
244
244
+
let mut primary_doc: Option<LexiconDoc> = None;
245
245
+
246
246
+
for schema in schemas {
247
247
+
// Determine if this is the primary def or a fragment
248
248
+
if schema.nsid.contains('#') {
249
249
+
// Fragment - extract def name and add to defs
250
250
+
let fragment_name = schema.nsid.split('#').nth(1).unwrap();
251
251
+
252
252
+
// Merge defs from fragment doc
253
253
+
for (def_name, def) in &schema.doc.defs {
254
254
+
// Use fragment name if def is "main", otherwise use as-is
255
255
+
let final_name = if def_name == "main" {
256
256
+
fragment_name.to_string()
257
257
+
} else {
258
258
+
def_name.to_string()
259
259
+
};
260
260
+
all_defs.insert(final_name, def.clone());
261
261
+
}
262
262
+
} else {
263
263
+
// Primary type - use as base doc
264
264
+
primary_doc = Some(schema.doc.clone());
265
265
+
}
266
266
+
}
267
267
+
268
268
+
// Build final doc
269
269
+
let mut final_doc = primary_doc.unwrap_or_else(|| LexiconDoc {
270
270
+
lexicon: Lexicon::Lexicon1,
271
271
+
id: base_nsid.into(),
272
272
+
revision: None,
273
273
+
description: None,
274
274
+
defs: BTreeMap::new(),
275
275
+
});
276
276
+
277
277
+
// Merge in all defs
278
278
+
for (k, v) in all_defs {
279
279
+
final_doc.defs.insert(k.into(), v);
280
280
+
}
281
281
+
282
282
+
// Serialize to JSON with "main" def first
283
283
+
let json = self.serialize_with_main_first(&final_doc)?;
284
284
+
285
285
+
// Write to file
286
286
+
let filename = base_nsid.replace('.', "_") + ".json";
287
287
+
let path = output_dir.join(&filename);
288
288
+
289
289
+
std::fs::write(&path, json).into_diagnostic()?;
290
290
+
291
291
+
if self.verbose {
292
292
+
println!(" Wrote {} ({} defs)", filename, final_doc.defs.len());
293
293
+
}
294
294
+
295
295
+
Ok(())
296
296
+
}
297
297
+
298
298
+
/// Serialize a lexicon doc with "main" def first
299
299
+
fn serialize_with_main_first(&self, doc: &LexiconDoc) -> Result<String> {
300
300
+
use serde_json::{json, Map, Value};
301
301
+
302
302
+
// Build defs map with main first
303
303
+
let mut defs_map = Map::new();
304
304
+
305
305
+
// Insert main first if it exists
306
306
+
if let Some(main_def) = doc.defs.get("main") {
307
307
+
let main_value = serde_json::to_value(main_def).into_diagnostic()?;
308
308
+
defs_map.insert("main".to_string(), main_value);
309
309
+
}
310
310
+
311
311
+
// Insert all other defs in sorted order
312
312
+
for (name, def) in &doc.defs {
313
313
+
if name != "main" {
314
314
+
let def_value = serde_json::to_value(def).into_diagnostic()?;
315
315
+
defs_map.insert(name.to_string(), def_value);
316
316
+
}
317
317
+
}
318
318
+
319
319
+
// Build final JSON object
320
320
+
let mut obj = Map::new();
321
321
+
obj.insert("lexicon".to_string(), json!(1));
322
322
+
obj.insert("id".to_string(), json!(doc.id.as_ref()));
323
323
+
324
324
+
if let Some(rev) = &doc.revision {
325
325
+
obj.insert("revision".to_string(), json!(rev));
326
326
+
}
327
327
+
328
328
+
if let Some(desc) = &doc.description {
329
329
+
obj.insert("description".to_string(), json!(desc));
330
330
+
}
331
331
+
332
332
+
obj.insert("defs".to_string(), Value::Object(defs_map));
333
333
+
334
334
+
// Pretty-print JSON
335
335
+
serde_json::to_string_pretty(&Value::Object(obj)).into_diagnostic()
336
336
+
}
337
337
+
121
338
/// Find workspace members by parsing Cargo.toml
122
339
fn find_workspace_members(&self) -> Result<Vec<PathBuf>> {
123
340
let cargo_toml = self.workspace_root.join("Cargo.toml");
···
218
435
}
219
436
220
437
Ok(schemas)
438
438
+
}
439
439
+
440
440
+
/// Find a type in a parsed file and convert to DeriveInput
441
441
+
fn find_type_in_file(&self, file: &syn::File, type_name: &str) -> Result<DeriveInput> {
442
442
+
for item in &file.items {
443
443
+
match item {
444
444
+
Item::Struct(item_struct) if item_struct.ident == type_name => {
445
445
+
// Convert ItemStruct to DeriveInput
446
446
+
return Ok(DeriveInput {
447
447
+
attrs: item_struct.attrs.clone(),
448
448
+
vis: item_struct.vis.clone(),
449
449
+
ident: item_struct.ident.clone(),
450
450
+
generics: item_struct.generics.clone(),
451
451
+
data: syn::Data::Struct(syn::DataStruct {
452
452
+
struct_token: item_struct.struct_token,
453
453
+
fields: item_struct.fields.clone(),
454
454
+
semi_token: item_struct.semi_token,
455
455
+
}),
456
456
+
});
457
457
+
}
458
458
+
Item::Enum(item_enum) if item_enum.ident == type_name => {
459
459
+
// Convert ItemEnum to DeriveInput
460
460
+
return Ok(DeriveInput {
461
461
+
attrs: item_enum.attrs.clone(),
462
462
+
vis: item_enum.vis.clone(),
463
463
+
ident: item_enum.ident.clone(),
464
464
+
generics: item_enum.generics.clone(),
465
465
+
data: syn::Data::Enum(syn::DataEnum {
466
466
+
enum_token: item_enum.enum_token,
467
467
+
brace_token: item_enum.brace_token,
468
468
+
variants: item_enum.variants.clone(),
469
469
+
}),
470
470
+
});
471
471
+
}
472
472
+
_ => continue,
473
473
+
}
474
474
+
}
475
475
+
476
476
+
Err(miette::miette!(
477
477
+
"Type {} not found in source file",
478
478
+
type_name
479
479
+
))
221
480
}
222
481
223
482
/// Extract schema info from attributes
+15
-7
crates/jacquard-lexicon/src/derive_impl/doc_to_tokens.rs
···
182
182
/// Convert LexObjectProperty to tokens
183
183
fn object_property_to_tokens(prop: &LexObjectProperty) -> TokenStream {
184
184
match prop {
185
185
-
LexObjectProperty::Boolean(b) => quote! {
185
185
+
LexObjectProperty::Boolean(_) => quote! {
186
186
::jacquard_lexicon::lexicon::LexObjectProperty::Boolean(
187
187
::jacquard_lexicon::lexicon::LexBoolean {
188
188
description: None,
···
374
374
LexStringFormat::AtUri => quote! { ::jacquard_lexicon::lexicon::LexStringFormat::AtUri },
375
375
LexStringFormat::Nsid => quote! { ::jacquard_lexicon::lexicon::LexStringFormat::Nsid },
376
376
LexStringFormat::Cid => quote! { ::jacquard_lexicon::lexicon::LexStringFormat::Cid },
377
377
-
LexStringFormat::Datetime => quote! { ::jacquard_lexicon::lexicon::LexStringFormat::Datetime },
378
378
-
LexStringFormat::Language => quote! { ::jacquard_lexicon::lexicon::LexStringFormat::Language },
377
377
+
LexStringFormat::Datetime => {
378
378
+
quote! { ::jacquard_lexicon::lexicon::LexStringFormat::Datetime }
379
379
+
}
380
380
+
LexStringFormat::Language => {
381
381
+
quote! { ::jacquard_lexicon::lexicon::LexStringFormat::Language }
382
382
+
}
379
383
LexStringFormat::Tid => quote! { ::jacquard_lexicon::lexicon::LexStringFormat::Tid },
380
380
-
LexStringFormat::RecordKey => quote! { ::jacquard_lexicon::lexicon::LexStringFormat::RecordKey },
381
381
-
LexStringFormat::AtIdentifier => quote! { ::jacquard_lexicon::lexicon::LexStringFormat::AtIdentifier },
384
384
+
LexStringFormat::RecordKey => {
385
385
+
quote! { ::jacquard_lexicon::lexicon::LexStringFormat::RecordKey }
386
386
+
}
387
387
+
LexStringFormat::AtIdentifier => {
388
388
+
quote! { ::jacquard_lexicon::lexicon::LexStringFormat::AtIdentifier }
389
389
+
}
382
390
LexStringFormat::Uri => quote! { ::jacquard_lexicon::lexicon::LexStringFormat::Uri },
383
391
});
384
392
let min_len = option_to_tokens(&s.min_length, |v| quote! { #v });
···
457
465
quote! {
458
466
::jacquard_lexicon::lexicon::LexXrpcParametersProperty::String(#string_tokens)
459
467
}
460
460
-
},
468
468
+
}
461
469
LexXrpcParametersProperty::Unknown(_) => quote! {
462
470
::jacquard_lexicon::lexicon::LexXrpcParametersProperty::Unknown(
463
471
::jacquard_lexicon::lexicon::LexUnknown { description: None }
···
510
518
}
511
519
)
512
520
}
513
513
-
},
521
521
+
}
514
522
}
515
523
}
516
524