tangled
alpha
login
or
join now
nonbinary.computer
/
jacquard
80
fork
atom
A better Rust ATProto crate
80
fork
atom
overview
issues
9
pulls
pipelines
codegen cleanup part 1
Orual
4 months ago
03932ab0
6f0e2f92
+385
-259
10 changed files
expand all
collapse all
unified
split
crates
jacquard-lexicon
src
codegen
lifetime.rs
names.rs
nsid_utils.rs
output.rs
structs.rs
types.rs
xrpc.rs
codegen.rs
corpus.rs
validation.rs
+1
crates/jacquard-lexicon/src/codegen.rs
···
6
6
7
7
pub mod lifetime;
8
8
pub mod names;
9
9
+
pub mod nsid_utils;
9
10
pub mod output;
10
11
pub mod schema_impl;
11
12
pub mod structs;
+91
-56
crates/jacquard-lexicon/src/codegen/lifetime.rs
···
1
1
use super::CodeGenerator;
2
2
-
use crate::lexicon::{LexArrayItem, LexObjectProperty, LexString, LexStringFormat, LexUserType};
2
2
+
use crate::lexicon::{
3
3
+
LexArrayItem, LexObjectProperty, LexPrimitiveArrayItem, LexString, LexStringFormat,
4
4
+
LexUserType, LexXrpcParametersProperty,
5
5
+
};
6
6
+
7
7
+
/// Trait for lexicon types that can determine lifetime requirements
8
8
+
trait HasLifetime {
9
9
+
/// Check if this type needs a lifetime parameter when generated
10
10
+
fn needs_lifetime(&self, generator: &CodeGenerator) -> bool;
11
11
+
}
3
12
4
4
-
impl<'c> CodeGenerator<'c> {
5
5
-
/// Check if a property type needs a lifetime parameter
6
6
-
pub(super) fn property_needs_lifetime(&self, prop: &LexObjectProperty<'static>) -> bool {
7
7
-
match prop {
13
13
+
impl HasLifetime for LexObjectProperty<'_> {
14
14
+
fn needs_lifetime(&self, generator: &CodeGenerator) -> bool {
15
15
+
match self {
8
16
LexObjectProperty::Boolean(_) | LexObjectProperty::Integer(_) => false,
9
9
-
LexObjectProperty::String(s) => self.string_needs_lifetime(s),
17
17
+
LexObjectProperty::String(s) => s.needs_lifetime(generator),
10
18
LexObjectProperty::Bytes(_) => false, // Bytes is owned
11
19
LexObjectProperty::CidLink(_)
12
20
| LexObjectProperty::Blob(_)
13
21
| LexObjectProperty::Unknown(_) => true,
14
14
-
LexObjectProperty::Array(array) => self.array_item_needs_lifetime(&array.items),
22
22
+
LexObjectProperty::Array(array) => array.items.needs_lifetime(generator),
15
23
LexObjectProperty::Object(_) => true, // Nested objects have lifetimes
16
16
-
LexObjectProperty::Ref(ref_type) => {
17
17
-
// Check if the ref target actually needs a lifetime
18
18
-
self.ref_needs_lifetime(&ref_type.r#ref)
19
19
-
}
24
24
+
LexObjectProperty::Ref(ref_type) => generator.ref_needs_lifetime(&ref_type.r#ref),
20
25
LexObjectProperty::Union(_) => true, // Unions generally have lifetimes
21
26
}
22
27
}
28
28
+
}
23
29
24
24
-
/// Check if an array item type needs a lifetime parameter
25
25
-
pub(super) fn array_item_needs_lifetime(&self, item: &LexArrayItem) -> bool {
26
26
-
match item {
30
30
+
impl HasLifetime for LexArrayItem<'_> {
31
31
+
fn needs_lifetime(&self, generator: &CodeGenerator) -> bool {
32
32
+
match self {
27
33
LexArrayItem::Boolean(_) | LexArrayItem::Integer(_) => false,
28
28
-
LexArrayItem::String(s) => self.string_needs_lifetime(s),
34
34
+
LexArrayItem::String(s) => s.needs_lifetime(generator),
29
35
LexArrayItem::Bytes(_) => false,
30
36
LexArrayItem::CidLink(_) | LexArrayItem::Blob(_) | LexArrayItem::Unknown(_) => true,
31
37
LexArrayItem::Object(_) => true, // Nested objects have lifetimes
32
32
-
LexArrayItem::Ref(ref_type) => self.ref_needs_lifetime(&ref_type.r#ref),
38
38
+
LexArrayItem::Ref(ref_type) => generator.ref_needs_lifetime(&ref_type.r#ref),
33
39
LexArrayItem::Union(_) => true,
34
40
}
35
41
}
42
42
+
}
36
43
37
37
-
/// Check if a string type needs a lifetime parameter
38
38
-
pub(super) fn string_needs_lifetime(&self, s: &LexString) -> bool {
39
39
-
match s.format {
44
44
+
impl HasLifetime for LexString<'_> {
45
45
+
fn needs_lifetime(&self, _generator: &CodeGenerator) -> bool {
46
46
+
match self.format {
40
47
Some(LexStringFormat::Datetime)
41
48
| Some(LexStringFormat::Language)
42
49
| Some(LexStringFormat::Tid) => false,
43
50
_ => true, // Most string types borrow
44
51
}
45
52
}
53
53
+
}
46
54
47
47
-
/// Check if a ref needs a lifetime parameter
48
48
-
pub(super) fn ref_needs_lifetime(&self, ref_str: &str) -> bool {
49
49
-
// Try to resolve the ref
50
50
-
if let Some((_doc, def)) = self.corpus.resolve_ref(ref_str) {
51
51
-
self.def_needs_lifetime(def)
52
52
-
} else {
53
53
-
// If we can't resolve it, assume it needs a lifetime (safe default)
54
54
-
true
55
55
-
}
56
56
-
}
57
57
-
58
58
-
/// Check if a lexicon def needs a lifetime parameter
59
59
-
pub(super) fn def_needs_lifetime(&self, def: &LexUserType<'static>) -> bool {
60
60
-
match def {
55
55
+
impl HasLifetime for LexUserType<'_> {
56
56
+
fn needs_lifetime(&self, generator: &CodeGenerator) -> bool {
57
57
+
match self {
61
58
LexUserType::Record(_) => true,
62
59
LexUserType::Object(_) => true,
63
60
LexUserType::Token(_) => false,
···
67
64
// Known values enums have Other(CowStr<'a>) variant
68
65
true
69
66
} else {
70
70
-
self.string_needs_lifetime(s)
67
67
+
s.needs_lifetime(generator)
71
68
}
72
69
}
73
70
LexUserType::Integer(_) => false,
74
71
LexUserType::Boolean(_) => false,
75
72
LexUserType::Bytes(_) => false,
76
73
LexUserType::CidLink(_) | LexUserType::Blob(_) | LexUserType::Unknown(_) => true,
77
77
-
LexUserType::Array(array) => self.array_item_needs_lifetime(&array.items),
74
74
+
LexUserType::Array(array) => array.items.needs_lifetime(generator),
78
75
LexUserType::XrpcQuery(_)
79
76
| LexUserType::XrpcProcedure(_)
80
77
| LexUserType::XrpcSubscription(_) => {
···
85
82
LexUserType::Union(_) => false, // Unions are just refs, no lifetime needed
86
83
}
87
84
}
85
85
+
}
86
86
+
87
87
+
impl HasLifetime for LexXrpcParametersProperty<'_> {
88
88
+
fn needs_lifetime(&self, generator: &CodeGenerator) -> bool {
89
89
+
match self {
90
90
+
LexXrpcParametersProperty::Boolean(_) | LexXrpcParametersProperty::Integer(_) => false,
91
91
+
LexXrpcParametersProperty::String(s) => s.needs_lifetime(generator),
92
92
+
LexXrpcParametersProperty::Unknown(_) => true,
93
93
+
LexXrpcParametersProperty::Array(arr) => arr.items.needs_lifetime(generator),
94
94
+
}
95
95
+
}
96
96
+
}
97
97
+
98
98
+
impl HasLifetime for LexPrimitiveArrayItem<'_> {
99
99
+
fn needs_lifetime(&self, generator: &CodeGenerator) -> bool {
100
100
+
match self {
101
101
+
LexPrimitiveArrayItem::Boolean(_) | LexPrimitiveArrayItem::Integer(_) => false,
102
102
+
LexPrimitiveArrayItem::String(s) => s.needs_lifetime(generator),
103
103
+
LexPrimitiveArrayItem::Unknown(_) => true,
104
104
+
}
105
105
+
}
106
106
+
}
107
107
+
108
108
+
impl<'c> CodeGenerator<'c> {
109
109
+
/// Check if a property type needs a lifetime parameter
110
110
+
pub(super) fn property_needs_lifetime(&self, prop: &LexObjectProperty<'_>) -> bool {
111
111
+
prop.needs_lifetime(self)
112
112
+
}
113
113
+
114
114
+
/// Check if an array item type needs a lifetime parameter
115
115
+
pub(super) fn array_item_needs_lifetime(&self, item: &LexArrayItem<'_>) -> bool {
116
116
+
item.needs_lifetime(self)
117
117
+
}
118
118
+
119
119
+
/// Check if a string type needs a lifetime parameter
120
120
+
pub(super) fn string_needs_lifetime(&self, s: &LexString<'_>) -> bool {
121
121
+
s.needs_lifetime(self)
122
122
+
}
123
123
+
124
124
+
/// Check if a ref needs a lifetime parameter
125
125
+
pub(super) fn ref_needs_lifetime(&self, ref_str: &str) -> bool {
126
126
+
// Try to resolve the ref
127
127
+
if let Some((_doc, def)) = self.corpus.resolve_ref(ref_str) {
128
128
+
def.needs_lifetime(self)
129
129
+
} else {
130
130
+
// If we can't resolve it, assume it needs a lifetime (safe default)
131
131
+
true
132
132
+
}
133
133
+
}
134
134
+
135
135
+
/// Check if a lexicon def needs a lifetime parameter
136
136
+
pub(super) fn def_needs_lifetime(&self, def: &LexUserType<'_>) -> bool {
137
137
+
def.needs_lifetime(self)
138
138
+
}
88
139
89
140
/// Check if xrpc params need a lifetime parameter
90
141
pub(super) fn params_need_lifetime(
91
142
&self,
92
92
-
params: &crate::lexicon::LexXrpcParameters<'static>,
143
143
+
params: &crate::lexicon::LexXrpcParameters<'_>,
93
144
) -> bool {
94
94
-
params.properties.values().any(|prop| {
95
95
-
use crate::lexicon::LexXrpcParametersProperty;
96
96
-
match prop {
97
97
-
LexXrpcParametersProperty::Boolean(_) | LexXrpcParametersProperty::Integer(_) => {
98
98
-
false
99
99
-
}
100
100
-
LexXrpcParametersProperty::String(s) => self.string_needs_lifetime(s),
101
101
-
LexXrpcParametersProperty::Unknown(_) => true,
102
102
-
LexXrpcParametersProperty::Array(arr) => {
103
103
-
use crate::lexicon::LexPrimitiveArrayItem;
104
104
-
match &arr.items {
105
105
-
LexPrimitiveArrayItem::Boolean(_) | LexPrimitiveArrayItem::Integer(_) => {
106
106
-
false
107
107
-
}
108
108
-
LexPrimitiveArrayItem::String(s) => self.string_needs_lifetime(s),
109
109
-
LexPrimitiveArrayItem::Unknown(_) => true,
110
110
-
}
111
111
-
}
112
112
-
}
113
113
-
})
145
145
+
params
146
146
+
.properties
147
147
+
.values()
148
148
+
.any(|prop| prop.needs_lifetime(self))
114
149
}
115
150
}
+9
-5
crates/jacquard-lexicon/src/codegen/names.rs
···
1
1
+
use super::nsid_utils::NsidPath;
1
2
use super::utils::sanitize_name;
2
3
use super::CodeGenerator;
3
4
use heck::{ToPascalCase, ToSnakeCase};
···
66
67
fn def_to_base_type_name(&self, nsid: &str, def_name: &str) -> String {
67
68
if def_name == "main" {
68
69
// Use last segment of NSID
69
69
-
let base_name = nsid.split('.').last().unwrap().to_pascal_case();
70
70
+
let nsid_path = NsidPath::parse(nsid);
71
71
+
let base_name = nsid_path.last_segment().to_pascal_case();
70
72
71
73
// Check if any other def would collide with this name
72
74
if let Some(doc) = self.corpus.get(nsid) {
···
101
103
// Add contextual prefix to avoid collision
102
104
if def_name == "main" {
103
105
// Use second-to-last NSID segment for main defs
104
104
-
let parts: Vec<_> = nsid.split('.').collect();
106
106
+
let nsid_path = NsidPath::parse(nsid);
107
107
+
let parts = nsid_path.segments();
105
108
if parts.len() >= 2 {
106
109
format!("{}{}", parts[parts.len() - 2].to_pascal_case(), base_name)
107
110
} else {
···
125
128
/// - `app.bsky.feed.post` → `app_bsky/feed/post.rs`
126
129
/// - `com.atproto.label.defs` → `com_atproto/label.rs` (defs go in parent)
127
130
pub(super) fn nsid_to_file_path(&self, nsid: &str) -> std::path::PathBuf {
128
128
-
let parts: Vec<&str> = nsid.split('.').collect();
131
131
+
let nsid_path = NsidPath::parse(nsid);
132
132
+
let parts = nsid_path.segments();
129
133
130
134
if parts.len() < 2 {
131
135
// Shouldn't happen with valid NSIDs, but handle gracefully
132
136
return format!("{}.rs", sanitize_name(parts[0])).into();
133
137
}
134
138
135
135
-
let last = parts.last().unwrap();
139
139
+
let last = nsid_path.last_segment();
136
140
137
137
-
if *last == "defs" && parts.len() >= 3 {
141
141
+
if nsid_path.is_defs() && parts.len() >= 3 {
138
142
// defs go in parent module: com.atproto.label.defs → com_atproto/label.rs
139
143
let first_two = format!("{}_{}", sanitize_name(parts[0]), sanitize_name(parts[1]));
140
144
if parts.len() == 3 {
+189
crates/jacquard-lexicon/src/codegen/nsid_utils.rs
···
1
1
+
//! Utilities for parsing and working with NSIDs and refs
2
2
+
3
3
+
/// Parsed NSID components for easier manipulation
4
4
+
#[derive(Debug, Clone, PartialEq, Eq)]
5
5
+
pub struct NsidPath<'a> {
6
6
+
nsid: &'a str,
7
7
+
segments: Vec<&'a str>,
8
8
+
}
9
9
+
10
10
+
impl<'a> NsidPath<'a> {
11
11
+
/// Parse an NSID into its component segments
12
12
+
pub fn parse(nsid: &'a str) -> Self {
13
13
+
let segments: Vec<&str> = nsid.split('.').collect();
14
14
+
Self { nsid, segments }
15
15
+
}
16
16
+
17
17
+
/// Get the namespace (first two segments joined with '.')
18
18
+
/// Returns "com.atproto" from "com.atproto.repo.strongRef"
19
19
+
pub fn namespace(&self) -> String {
20
20
+
if self.segments.len() >= 2 {
21
21
+
format!("{}.{}", self.segments[0], self.segments[1])
22
22
+
} else {
23
23
+
self.nsid.to_string()
24
24
+
}
25
25
+
}
26
26
+
27
27
+
/// Get the last segment of the NSID
28
28
+
pub fn last_segment(&self) -> &str {
29
29
+
self.segments.last().copied().unwrap_or(self.nsid)
30
30
+
}
31
31
+
32
32
+
/// Get all segments except the last
33
33
+
pub fn parent_segments(&self) -> &[&str] {
34
34
+
if self.segments.is_empty() {
35
35
+
&[]
36
36
+
} else {
37
37
+
&self.segments[..self.segments.len() - 1]
38
38
+
}
39
39
+
}
40
40
+
41
41
+
/// Check if this is a "defs" NSID (ends with "defs")
42
42
+
pub fn is_defs(&self) -> bool {
43
43
+
self.last_segment() == "defs"
44
44
+
}
45
45
+
46
46
+
/// Get all segments
47
47
+
pub fn segments(&self) -> &[&str] {
48
48
+
&self.segments
49
49
+
}
50
50
+
51
51
+
/// Get the original NSID string
52
52
+
pub fn as_str(&self) -> &str {
53
53
+
self.nsid
54
54
+
}
55
55
+
56
56
+
/// Get number of segments
57
57
+
pub fn len(&self) -> usize {
58
58
+
self.segments.len()
59
59
+
}
60
60
+
61
61
+
/// Check if empty (should not happen with valid NSIDs)
62
62
+
pub fn is_empty(&self) -> bool {
63
63
+
self.segments.is_empty()
64
64
+
}
65
65
+
}
66
66
+
67
67
+
/// Parsed reference with NSID and optional fragment
68
68
+
#[derive(Debug, Clone, PartialEq, Eq)]
69
69
+
pub struct RefPath<'a> {
70
70
+
nsid: &'a str,
71
71
+
def: &'a str,
72
72
+
}
73
73
+
74
74
+
impl<'a> RefPath<'a> {
75
75
+
/// Parse a reference string, normalizing it based on current NSID context
76
76
+
pub fn parse(ref_str: &'a str, current_nsid: Option<&'a str>) -> Self {
77
77
+
if let Some(fragment) = ref_str.strip_prefix('#') {
78
78
+
// Local ref: #option → use current_nsid
79
79
+
let nsid = current_nsid.unwrap_or("");
80
80
+
Self {
81
81
+
nsid,
82
82
+
def: fragment,
83
83
+
}
84
84
+
} else if let Some((nsid, def)) = ref_str.split_once('#') {
85
85
+
// Full ref with fragment: nsid#def
86
86
+
Self { nsid, def }
87
87
+
} else {
88
88
+
// Full ref without fragment: nsid (implicit "main")
89
89
+
Self {
90
90
+
nsid: ref_str,
91
91
+
def: "main",
92
92
+
}
93
93
+
}
94
94
+
}
95
95
+
96
96
+
/// Get the NSID portion of the ref
97
97
+
pub fn nsid(&self) -> &str {
98
98
+
self.nsid
99
99
+
}
100
100
+
101
101
+
/// Get the def name (fragment) portion of the ref
102
102
+
pub fn def(&self) -> &str {
103
103
+
self.def
104
104
+
}
105
105
+
106
106
+
/// Check if this is a local ref (was parsed from #fragment)
107
107
+
pub fn is_local(&self, current_nsid: &str) -> bool {
108
108
+
self.nsid == current_nsid && self.def != "main"
109
109
+
}
110
110
+
111
111
+
/// Get the full ref string (nsid#def)
112
112
+
pub fn full_ref(&self) -> String {
113
113
+
if self.def == "main" {
114
114
+
self.nsid.to_string()
115
115
+
} else {
116
116
+
format!("{}#{}", self.nsid, self.def)
117
117
+
}
118
118
+
}
119
119
+
120
120
+
/// Normalize a local ref by prepending the current NSID if needed
121
121
+
/// Returns the normalized ref string suitable for corpus lookup
122
122
+
pub fn normalize(ref_str: &str, current_nsid: &str) -> String {
123
123
+
if ref_str.starts_with('#') {
124
124
+
format!("{}{}", current_nsid, ref_str)
125
125
+
} else {
126
126
+
ref_str.to_string()
127
127
+
}
128
128
+
}
129
129
+
}
130
130
+
131
131
+
#[cfg(test)]
132
132
+
mod tests {
133
133
+
use super::*;
134
134
+
135
135
+
#[test]
136
136
+
fn test_nsid_path_parse() {
137
137
+
let path = NsidPath::parse("com.atproto.repo.strongRef");
138
138
+
assert_eq!(path.segments(), &["com", "atproto", "repo", "strongRef"]);
139
139
+
assert_eq!(path.namespace(), "com.atproto");
140
140
+
assert_eq!(path.last_segment(), "strongRef");
141
141
+
assert_eq!(path.parent_segments(), &["com", "atproto", "repo"]);
142
142
+
assert!(!path.is_defs());
143
143
+
}
144
144
+
145
145
+
#[test]
146
146
+
fn test_nsid_path_defs() {
147
147
+
let path = NsidPath::parse("com.atproto.label.defs");
148
148
+
assert!(path.is_defs());
149
149
+
assert_eq!(path.last_segment(), "defs");
150
150
+
}
151
151
+
152
152
+
#[test]
153
153
+
fn test_ref_path_local() {
154
154
+
let ref_path = RefPath::parse("#option", Some("com.example.foo"));
155
155
+
assert_eq!(ref_path.nsid(), "com.example.foo");
156
156
+
assert_eq!(ref_path.def(), "option");
157
157
+
assert!(ref_path.is_local("com.example.foo"));
158
158
+
assert_eq!(ref_path.full_ref(), "com.example.foo#option");
159
159
+
}
160
160
+
161
161
+
#[test]
162
162
+
fn test_ref_path_with_fragment() {
163
163
+
let ref_path = RefPath::parse("com.example.foo#bar", None);
164
164
+
assert_eq!(ref_path.nsid(), "com.example.foo");
165
165
+
assert_eq!(ref_path.def(), "bar");
166
166
+
assert!(!ref_path.is_local("com.other.baz"));
167
167
+
assert_eq!(ref_path.full_ref(), "com.example.foo#bar");
168
168
+
}
169
169
+
170
170
+
#[test]
171
171
+
fn test_ref_path_implicit_main() {
172
172
+
let ref_path = RefPath::parse("com.example.foo", None);
173
173
+
assert_eq!(ref_path.nsid(), "com.example.foo");
174
174
+
assert_eq!(ref_path.def(), "main");
175
175
+
assert_eq!(ref_path.full_ref(), "com.example.foo");
176
176
+
}
177
177
+
178
178
+
#[test]
179
179
+
fn test_ref_path_normalize() {
180
180
+
assert_eq!(
181
181
+
RefPath::normalize("#option", "com.example.foo"),
182
182
+
"com.example.foo#option"
183
183
+
);
184
184
+
assert_eq!(
185
185
+
RefPath::normalize("com.other.bar#baz", "com.example.foo"),
186
186
+
"com.other.bar#baz"
187
187
+
);
188
188
+
}
189
189
+
}
+3
-6
crates/jacquard-lexicon/src/codegen/output.rs
···
3
3
use quote::quote;
4
4
use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet};
5
5
6
6
+
use super::nsid_utils::NsidPath;
6
7
use super::utils::{make_ident, sanitize_name};
7
8
use super::CodeGenerator;
8
9
···
249
250
250
251
// Collect all namespaces from the corpus (first two segments of each NSID)
251
252
for (nsid, _doc) in self.corpus.iter() {
252
252
-
let parts: Vec<_> = nsid.as_str().splitn(3, '.').collect();
253
253
-
let namespace = if parts.len() >= 2 {
254
254
-
format!("{}.{}", parts[0], parts[1])
255
255
-
} else {
256
256
-
nsid.to_string()
257
257
-
};
253
253
+
let nsid_path = NsidPath::parse(nsid.as_str());
254
254
+
let namespace = nsid_path.namespace();
258
255
all_namespaces.insert(namespace);
259
256
}
260
257
+11
-24
crates/jacquard-lexicon/src/codegen/structs.rs
···
6
6
use proc_macro2::TokenStream;
7
7
use quote::quote;
8
8
9
9
+
use super::nsid_utils::{NsidPath, RefPath};
9
10
use super::CodeGenerator;
10
11
use super::utils::{make_ident, value_to_variant_name};
11
12
···
467
468
let enum_ident = syn::Ident::new(union_name, proc_macro2::Span::call_site());
468
469
469
470
// Extract namespace prefix from current NSID (first two segments: "sh.weaver" from "sh.weaver.embed.recordWithMedia")
470
470
-
let parts: Vec<_> = current_nsid.splitn(3, '.').collect();
471
471
-
let current_namespace = if parts.len() >= 2 {
472
472
-
format!("{}.{}", parts[0], parts[1])
473
473
-
} else {
474
474
-
current_nsid.to_string()
475
475
-
};
471
471
+
let current_nsid_path = NsidPath::parse(current_nsid);
472
472
+
let current_namespace = current_nsid_path.namespace();
476
473
477
474
// First pass: collect all variant names and detect collisions
478
475
#[derive(Debug)]
···
486
483
let mut variant_infos = Vec::new();
487
484
for ref_str in refs {
488
485
// Normalize local refs (starting with #) by prepending current NSID
489
489
-
let normalized_ref = if ref_str.starts_with('#') {
490
490
-
format!("{}{}", current_nsid, ref_str)
491
491
-
} else {
492
492
-
ref_str.to_string()
493
493
-
};
486
486
+
let normalized_ref = RefPath::normalize(ref_str, current_nsid);
494
487
495
488
// Parse ref to get NSID and def name
496
496
-
let (ref_nsid_str, ref_def) =
497
497
-
if let Some((nsid, fragment)) = normalized_ref.split_once('#') {
498
498
-
(nsid, fragment)
499
499
-
} else {
500
500
-
(normalized_ref.as_str(), "main")
501
501
-
};
489
489
+
let ref_path = RefPath::parse(&normalized_ref, None);
490
490
+
let ref_nsid_str = ref_path.nsid();
491
491
+
let ref_def = ref_path.def();
502
492
503
493
// Skip unknown refs - they'll be handled by Unknown variant
504
494
if !self.corpus.ref_exists(&normalized_ref) {
···
555
545
556
546
// Track namespace dependency for foreign refs
557
547
if !info.is_current_namespace {
558
558
-
let parts: Vec<_> = info.ref_nsid.splitn(3, '.').collect();
559
559
-
let foreign_namespace = if parts.len() >= 2 {
560
560
-
format!("{}.{}", parts[0], parts[1])
561
561
-
} else {
562
562
-
info.ref_nsid.to_string()
563
563
-
};
548
548
+
let ref_nsid_path = NsidPath::parse(&info.ref_nsid);
549
549
+
let foreign_namespace = ref_nsid_path.namespace();
564
550
self.namespace_deps
565
551
.borrow_mut()
566
552
.entry(current_namespace.clone())
···
571
557
// Disambiguate: add second NSID segment prefix only to foreign refs when there's a collision
572
558
let variant_name = if has_collision && !info.is_current_namespace {
573
559
// Get second segment (namespace identifier: "bsky" from "app.bsky.embed.images")
574
574
-
let segments: Vec<&str> = info.ref_nsid.split('.').collect();
560
560
+
let ref_nsid_path = NsidPath::parse(&info.ref_nsid);
561
561
+
let segments = ref_nsid_path.segments();
575
562
let prefix = if segments.len() >= 2 {
576
563
segments[1].to_pascal_case()
577
564
} else {
+13
-17
crates/jacquard-lexicon/src/codegen/types.rs
···
4
4
use proc_macro2::TokenStream;
5
5
use quote::quote;
6
6
7
7
+
use super::nsid_utils::{NsidPath, RefPath};
7
8
use super::CodeGenerator;
8
9
9
10
impl<'c> CodeGenerator<'c> {
···
90
91
};
91
92
92
93
// Parse ref to get type name
93
93
-
let (ref_nsid, ref_def) =
94
94
-
if let Some((nsid_part, fragment)) = ref_str.split_once('#') {
95
95
-
(nsid_part, fragment)
96
96
-
} else {
97
97
-
(ref_str.as_str(), "main")
98
98
-
};
99
99
-
let ref_type_name = self.def_to_type_name(ref_nsid, ref_def);
94
94
+
let ref_path = RefPath::parse(&ref_str, None);
95
95
+
let ref_type_name = self.def_to_type_name(ref_path.nsid(), ref_path.def());
100
96
101
97
// If self-referential, keep union for indirection (variants are boxed)
102
98
if ref_type_name == parent_type_name {
···
185
181
use super::utils::sanitize_name;
186
182
use crate::error::CodegenError;
187
183
188
188
-
// Parse NSID and fragment
189
189
-
let (ref_nsid, ref_def) = if let Some((nsid, fragment)) = ref_str.split_once('#') {
190
190
-
(nsid, fragment)
191
191
-
} else {
192
192
-
(ref_str, "main")
193
193
-
};
184
184
+
// Parse ref to get NSID and def
185
185
+
let ref_path = RefPath::parse(ref_str, None);
186
186
+
let ref_nsid = ref_path.nsid();
187
187
+
let ref_def = ref_path.def();
194
188
195
189
// Check if ref exists
196
190
if !self.corpus.ref_exists(ref_str) {
···
198
192
return Ok(quote! { jacquard_common::types::value::Data<'a> });
199
193
}
200
194
195
195
+
// Parse NSID into components
196
196
+
let nsid_path = NsidPath::parse(ref_nsid);
197
197
+
let parts = nsid_path.segments();
198
198
+
let last_segment = nsid_path.last_segment();
199
199
+
201
200
// Convert NSID to module path
202
201
// com.atproto.repo.strongRef -> com_atproto::repo::strong_ref::StrongRef
203
202
// app.bsky.richtext.facet -> app_bsky::richtext::facet::Facet
204
203
// app.bsky.actor.defs#nux -> app_bsky::actor::Nux (defs go in parent module)
205
205
-
let parts: Vec<&str> = ref_nsid.split('.').collect();
206
206
-
let last_segment = parts.last().unwrap();
207
207
-
208
204
let type_name = self.def_to_type_name(ref_nsid, ref_def);
209
205
210
210
-
let path_str = if *last_segment == "defs" && parts.len() >= 3 {
206
206
+
let path_str = if nsid_path.is_defs() && parts.len() >= 3 {
211
207
// defs types go in parent module
212
208
let first_two = format!("{}_{}", sanitize_name(parts[0]), sanitize_name(parts[1]));
213
209
if parts.len() == 3 {
+7
-12
crates/jacquard-lexicon/src/codegen/xrpc.rs
···
7
7
use proc_macro2::TokenStream;
8
8
use quote::quote;
9
9
10
10
+
use super::nsid_utils::{NsidPath, RefPath};
10
11
use super::CodeGenerator;
11
12
use super::utils::make_ident;
12
13
···
230
231
let ref_str_s = ref_str.as_ref();
231
232
232
233
// Normalize local refs (starting with #) by prepending current NSID
233
233
-
let normalized_ref = if ref_str.starts_with('#') {
234
234
-
format!("{}{}", nsid, ref_str)
235
235
-
} else {
236
236
-
ref_str.to_string()
237
237
-
};
234
234
+
let normalized_ref = RefPath::normalize(ref_str, nsid);
238
235
239
236
// Parse ref to get NSID and def name
240
240
-
let (ref_nsid, ref_def) =
241
241
-
if let Some((nsid_part, fragment)) = normalized_ref.split_once('#') {
242
242
-
(nsid_part, fragment)
243
243
-
} else {
244
244
-
(normalized_ref.as_str(), "main")
245
245
-
};
237
237
+
let ref_path = RefPath::parse(&normalized_ref, None);
238
238
+
let ref_nsid = ref_path.nsid();
239
239
+
let ref_def = ref_path.def();
246
240
247
241
let variant_name = if ref_def == "main" {
248
248
-
ref_nsid.split('.').last().unwrap().to_pascal_case()
242
242
+
let ref_nsid_path = NsidPath::parse(ref_nsid);
243
243
+
ref_nsid_path.last_segment().to_pascal_case()
249
244
} else {
250
245
ref_def.to_pascal_case()
251
246
};
+4
-8
crates/jacquard-lexicon/src/corpus.rs
···
1
1
+
use crate::codegen::nsid_utils::RefPath;
1
2
use crate::error::Result;
2
3
use crate::lexicon::{LexUserType, LexiconDoc};
3
4
use jacquard_common::{into_static::IntoStatic, smol_str::SmolStr};
···
64
65
&self,
65
66
ref_str: &str,
66
67
) -> Option<(&LexiconDoc<'static>, &LexUserType<'static>)> {
67
67
-
let (nsid, def_name) = if let Some((nsid, fragment)) = ref_str.split_once('#') {
68
68
-
(nsid, fragment)
69
69
-
} else {
70
70
-
(ref_str, "main")
71
71
-
};
72
72
-
73
73
-
let doc = self.get(nsid)?;
74
74
-
let def = doc.defs.get(def_name)?;
68
68
+
let ref_path = RefPath::parse(ref_str, None);
69
69
+
let doc = self.get(ref_path.nsid())?;
70
70
+
let def = doc.defs.get(ref_path.def())?;
75
71
Some((doc, def))
76
72
}
77
73
+57
-131
crates/jacquard-lexicon/src/validation.rs
···
3
3
//! This module provides infrastructure for validating untyped `Data` values against
4
4
//! lexicon schemas, enabling partial deserialization, debugging, and schema migration.
5
5
6
6
+
use crate::codegen::nsid_utils::RefPath;
7
7
+
use crate::lexicon::{LexArrayItem, LexObjectProperty};
6
8
use crate::schema::SchemaRegistry;
7
9
use cid::Cid as IpldCid;
8
10
use dashmap::DashMap;
···
255
257
Ok(IpldCid::new_v1(0x71, multihash))
256
258
}
257
259
260
260
+
/// Trait for converting lexicon types to object properties
261
261
+
///
262
262
+
/// This enables type-safe conversion between array items and object properties
263
263
+
/// for unified validation logic.
264
264
+
trait IntoObjectProperty<'a> {
265
265
+
/// Convert this type to an equivalent object property
266
266
+
fn into_object_property(self) -> LexObjectProperty<'a>;
267
267
+
}
268
268
+
269
269
+
impl<'a> IntoObjectProperty<'a> for LexArrayItem<'a> {
270
270
+
fn into_object_property(self) -> LexObjectProperty<'a> {
271
271
+
match self {
272
272
+
LexArrayItem::String(s) => LexObjectProperty::String(s),
273
273
+
LexArrayItem::Integer(i) => LexObjectProperty::Integer(i),
274
274
+
LexArrayItem::Boolean(b) => LexObjectProperty::Boolean(b),
275
275
+
LexArrayItem::Object(o) => LexObjectProperty::Object(o),
276
276
+
LexArrayItem::Unknown(u) => LexObjectProperty::Unknown(u),
277
277
+
LexArrayItem::Bytes(b) => LexObjectProperty::Bytes(b),
278
278
+
LexArrayItem::CidLink(c) => LexObjectProperty::CidLink(c),
279
279
+
LexArrayItem::Blob(b) => LexObjectProperty::Blob(b),
280
280
+
LexArrayItem::Ref(r) => LexObjectProperty::Ref(r),
281
281
+
LexArrayItem::Union(u) => LexObjectProperty::Union(u),
282
282
+
}
283
283
+
}
284
284
+
}
285
285
+
258
286
/// Result of validating Data against a schema
259
287
///
260
288
/// Distinguishes between structural errors (type mismatches, missing fields) and
···
487
515
}
488
516
}
489
517
490
490
-
/// Normalize a ref string to (nsid, def_name)
491
491
-
fn normalize_ref(ref_str: &str, current_nsid: &str) -> (String, String) {
492
492
-
if let Some(fragment) = ref_str.strip_prefix('#') {
493
493
-
// #option -> (current_nsid, "option")
494
494
-
(current_nsid.to_string(), fragment.to_string())
495
495
-
} else if let Some((nsid, def)) = ref_str.split_once('#') {
496
496
-
// com.example.foo#bar -> ("com.example.foo", "bar")
497
497
-
(nsid.to_string(), def.to_string())
498
498
-
} else {
499
499
-
// com.example.foo -> ("com.example.foo", "main")
500
500
-
(ref_str.to_string(), "main".to_string())
501
501
-
}
502
502
-
}
503
518
504
519
/// Validate data against a lexicon def
505
520
fn validate_def(
···
720
735
721
736
// Try to match against refs
722
737
for variant_ref in &u.refs {
723
723
-
let (variant_nsid, variant_def) =
724
724
-
normalize_ref(variant_ref.as_ref(), &ctx.current_nsid);
725
725
-
let full_variant = format!("{}#{}", variant_nsid, variant_def);
738
738
+
let ref_path = RefPath::parse(variant_ref.as_ref(), Some(&ctx.current_nsid));
739
739
+
let variant_nsid = ref_path.nsid().to_string();
740
740
+
let variant_def = ref_path.def().to_string();
741
741
+
let full_variant = ref_path.full_ref();
726
742
727
743
// Match by full ref or just nsid
728
744
if type_str == full_variant || type_str == variant_nsid {
···
779
795
}
780
796
781
797
// Normalize ref
782
782
-
let (ref_nsid, ref_def) = normalize_ref(r.r#ref.as_ref(), &ctx.current_nsid);
783
783
-
let full_ref = format!("{}#{}", ref_nsid, ref_def);
798
798
+
let ref_path = RefPath::parse(r.r#ref.as_ref(), Some(&ctx.current_nsid));
799
799
+
let ref_nsid = ref_path.nsid().to_string();
800
800
+
let ref_def = ref_path.def().to_string();
801
801
+
let full_ref = ref_path.full_ref();
784
802
785
803
// Cycle detection
786
804
if ctx.ref_stack.contains(&full_ref) {
···
861
879
fn validate_array_item(
862
880
path: &mut ValidationPath,
863
881
data: &Data,
864
864
-
item_schema: &crate::lexicon::LexArrayItem,
882
882
+
item_schema: &LexArrayItem,
865
883
registry: &SchemaRegistry,
866
884
ctx: &mut ValidationContext,
867
885
) -> Vec<StructuralError> {
868
868
-
use crate::lexicon::LexArrayItem;
869
869
-
870
870
-
match item_schema {
871
871
-
LexArrayItem::String(s) => validate_property(
872
872
-
path,
873
873
-
data,
874
874
-
&crate::lexicon::LexObjectProperty::String(s.clone()),
875
875
-
registry,
876
876
-
ctx,
877
877
-
),
878
878
-
LexArrayItem::Integer(i) => validate_property(
879
879
-
path,
880
880
-
data,
881
881
-
&crate::lexicon::LexObjectProperty::Integer(i.clone()),
882
882
-
registry,
883
883
-
ctx,
884
884
-
),
885
885
-
LexArrayItem::Boolean(b) => validate_property(
886
886
-
path,
887
887
-
data,
888
888
-
&crate::lexicon::LexObjectProperty::Boolean(b.clone()),
889
889
-
registry,
890
890
-
ctx,
891
891
-
),
892
892
-
LexArrayItem::Object(o) => validate_property(
893
893
-
path,
894
894
-
data,
895
895
-
&crate::lexicon::LexObjectProperty::Object(o.clone()),
896
896
-
registry,
897
897
-
ctx,
898
898
-
),
899
899
-
LexArrayItem::Unknown(u) => validate_property(
900
900
-
path,
901
901
-
data,
902
902
-
&crate::lexicon::LexObjectProperty::Unknown(u.clone()),
903
903
-
registry,
904
904
-
ctx,
905
905
-
),
906
906
-
LexArrayItem::Bytes(b) => validate_property(
907
907
-
path,
908
908
-
data,
909
909
-
&crate::lexicon::LexObjectProperty::Bytes(b.clone()),
910
910
-
registry,
911
911
-
ctx,
912
912
-
),
913
913
-
LexArrayItem::CidLink(c) => validate_property(
914
914
-
path,
915
915
-
data,
916
916
-
&crate::lexicon::LexObjectProperty::CidLink(c.clone()),
917
917
-
registry,
918
918
-
ctx,
919
919
-
),
920
920
-
LexArrayItem::Blob(b) => validate_property(
921
921
-
path,
922
922
-
data,
923
923
-
&crate::lexicon::LexObjectProperty::Blob(b.clone()),
924
924
-
registry,
925
925
-
ctx,
926
926
-
),
927
927
-
LexArrayItem::Ref(r) => validate_property(
928
928
-
path,
929
929
-
data,
930
930
-
&crate::lexicon::LexObjectProperty::Ref(r.clone()),
931
931
-
registry,
932
932
-
ctx,
933
933
-
),
934
934
-
LexArrayItem::Union(u) => validate_property(
935
935
-
path,
936
936
-
data,
937
937
-
&crate::lexicon::LexObjectProperty::Union(u.clone()),
938
938
-
registry,
939
939
-
ctx,
940
940
-
),
941
941
-
}
886
886
+
validate_property(
887
887
+
path,
888
888
+
data,
889
889
+
&item_schema.clone().into_object_property(),
890
890
+
registry,
891
891
+
ctx,
892
892
+
)
942
893
}
943
894
944
895
// ============================================================================
···
1115
1066
1116
1067
LexObjectProperty::Ref(r) => {
1117
1068
// Follow ref and check constraints
1118
1118
-
let (ref_nsid, ref_def) = normalize_ref(r.r#ref.as_ref(), current_nsid);
1069
1069
+
let ref_path = RefPath::parse(r.r#ref.as_ref(), Some(current_nsid));
1070
1070
+
let ref_nsid = ref_path.nsid();
1071
1071
+
let ref_def = ref_path.def();
1119
1072
1120
1120
-
if registry.get_def(&ref_nsid, &ref_def).is_some() {
1121
1121
-
validate_constraints_impl(path, data, &ref_nsid, &ref_def, registry)
1073
1073
+
if registry.get_def(ref_nsid, ref_def).is_some() {
1074
1074
+
validate_constraints_impl(path, data, ref_nsid, ref_def, registry)
1122
1075
} else {
1123
1076
Vec::new()
1124
1077
}
···
1256
1209
fn check_array_item_constraints(
1257
1210
path: &mut ValidationPath,
1258
1211
data: &Data,
1259
1259
-
item_schema: &crate::lexicon::LexArrayItem,
1212
1212
+
item_schema: &LexArrayItem,
1260
1213
current_nsid: &str,
1261
1214
registry: &SchemaRegistry,
1262
1215
) -> Vec<ConstraintError> {
1263
1263
-
use crate::lexicon::LexArrayItem;
1264
1264
-
1265
1265
-
match item_schema {
1266
1266
-
LexArrayItem::String(s) => check_property_constraints(
1267
1267
-
path,
1268
1268
-
data,
1269
1269
-
&crate::lexicon::LexObjectProperty::String(s.clone()),
1270
1270
-
current_nsid,
1271
1271
-
registry,
1272
1272
-
),
1273
1273
-
LexArrayItem::Integer(i) => check_property_constraints(
1274
1274
-
path,
1275
1275
-
data,
1276
1276
-
&crate::lexicon::LexObjectProperty::Integer(i.clone()),
1277
1277
-
current_nsid,
1278
1278
-
registry,
1279
1279
-
),
1280
1280
-
LexArrayItem::Object(o) => check_property_constraints(
1281
1281
-
path,
1282
1282
-
data,
1283
1283
-
&crate::lexicon::LexObjectProperty::Object(o.clone()),
1284
1284
-
current_nsid,
1285
1285
-
registry,
1286
1286
-
),
1287
1287
-
LexArrayItem::Ref(r) => check_property_constraints(
1288
1288
-
path,
1289
1289
-
data,
1290
1290
-
&crate::lexicon::LexObjectProperty::Ref(r.clone()),
1291
1291
-
current_nsid,
1292
1292
-
registry,
1293
1293
-
),
1294
1294
-
// Other array item types don't have constraints
1295
1295
-
_ => Vec::new(),
1296
1296
-
}
1216
1216
+
check_property_constraints(
1217
1217
+
path,
1218
1218
+
data,
1219
1219
+
&item_schema.clone().into_object_property(),
1220
1220
+
current_nsid,
1221
1221
+
registry,
1222
1222
+
)
1297
1223
}
1298
1224
1299
1225
#[cfg(test)]