···1515//! - [`derive_impl`] - Implementation functions for derive macros (used by jacquard-derive)
1616//! - [`validation`] - Runtime validation of Data against lexicon schemas
17171818+#[cfg(feature = "codegen")]
1819pub mod codegen;
2020+#[cfg(feature = "codegen")]
1921pub mod corpus;
2222+#[cfg(feature = "codegen")]
2323+#[doc(hidden)]
2024pub mod derive_impl;
2525+#[cfg(feature = "codegen")]
2126pub mod error;
2727+#[cfg(feature = "codegen")]
2228pub mod fs;
2329pub mod lexicon;
3030+pub mod ref_utils;
2431pub mod schema;
2525-pub mod union_registry;
2632pub mod validation;
+189
crates/jacquard-lexicon/src/ref_utils.rs
···11+//! Utilities for parsing and working with NSIDs and refs
22+33+/// Parsed NSID components for easier manipulation
44+#[derive(Debug, Clone, PartialEq, Eq)]
55+pub struct NsidPath<'a> {
66+ nsid: &'a str,
77+ segments: Vec<&'a str>,
88+}
99+1010+impl<'a> NsidPath<'a> {
1111+ /// Parse an NSID into its component segments
1212+ pub fn parse(nsid: &'a str) -> Self {
1313+ let segments: Vec<&str> = nsid.split('.').collect();
1414+ Self { nsid, segments }
1515+ }
1616+1717+ /// Get the namespace (first two segments joined with '.')
1818+ /// Returns "com.atproto" from "com.atproto.repo.strongRef"
1919+ pub fn namespace(&self) -> String {
2020+ if self.segments.len() >= 2 {
2121+ format!("{}.{}", self.segments[0], self.segments[1])
2222+ } else {
2323+ self.nsid.to_string()
2424+ }
2525+ }
2626+2727+ /// Get the last segment of the NSID
2828+ pub fn last_segment(&self) -> &str {
2929+ self.segments.last().copied().unwrap_or(self.nsid)
3030+ }
3131+3232+ /// Get all segments except the last
3333+ pub fn parent_segments(&self) -> &[&str] {
3434+ if self.segments.is_empty() {
3535+ &[]
3636+ } else {
3737+ &self.segments[..self.segments.len() - 1]
3838+ }
3939+ }
4040+4141+ /// Check if this is a "defs" NSID (ends with "defs")
4242+ pub fn is_defs(&self) -> bool {
4343+ self.last_segment() == "defs"
4444+ }
4545+4646+ /// Get all segments
4747+ pub fn segments(&self) -> &[&str] {
4848+ &self.segments
4949+ }
5050+5151+ /// Get the original NSID string
5252+ pub fn as_str(&self) -> &str {
5353+ self.nsid
5454+ }
5555+5656+ /// Get number of segments
5757+ pub fn len(&self) -> usize {
5858+ self.segments.len()
5959+ }
6060+6161+ /// Check if empty (should not happen with valid NSIDs)
6262+ pub fn is_empty(&self) -> bool {
6363+ self.segments.is_empty()
6464+ }
6565+}
6666+6767+/// Parsed reference with NSID and optional fragment
6868+#[derive(Debug, Clone, PartialEq, Eq)]
6969+pub struct RefPath<'a> {
7070+ nsid: &'a str,
7171+ def: &'a str,
7272+}
7373+7474+impl<'a> RefPath<'a> {
7575+ /// Parse a reference string, normalizing it based on current NSID context
7676+ pub fn parse(ref_str: &'a str, current_nsid: Option<&'a str>) -> Self {
7777+ if let Some(fragment) = ref_str.strip_prefix('#') {
7878+ // Local ref: #option → use current_nsid
7979+ let nsid = current_nsid.unwrap_or("");
8080+ Self {
8181+ nsid,
8282+ def: fragment,
8383+ }
8484+ } else if let Some((nsid, def)) = ref_str.split_once('#') {
8585+ // Full ref with fragment: nsid#def
8686+ Self { nsid, def }
8787+ } else {
8888+ // Full ref without fragment: nsid (implicit "main")
8989+ Self {
9090+ nsid: ref_str,
9191+ def: "main",
9292+ }
9393+ }
9494+ }
9595+9696+ /// Get the NSID portion of the ref
9797+ pub fn nsid(&self) -> &str {
9898+ self.nsid
9999+ }
100100+101101+ /// Get the def name (fragment) portion of the ref
102102+ pub fn def(&self) -> &str {
103103+ self.def
104104+ }
105105+106106+ /// Check if this is a local ref (was parsed from #fragment)
107107+ pub fn is_local(&self, current_nsid: &str) -> bool {
108108+ self.nsid == current_nsid && self.def != "main"
109109+ }
110110+111111+ /// Get the full ref string (nsid#def)
112112+ pub fn full_ref(&self) -> String {
113113+ if self.def == "main" {
114114+ self.nsid.to_string()
115115+ } else {
116116+ format!("{}#{}", self.nsid, self.def)
117117+ }
118118+ }
119119+120120+ /// Normalize a local ref by prepending the current NSID if needed
121121+ /// Returns the normalized ref string suitable for corpus lookup
122122+ pub fn normalize(ref_str: &str, current_nsid: &str) -> String {
123123+ if ref_str.starts_with('#') {
124124+ format!("{}{}", current_nsid, ref_str)
125125+ } else {
126126+ ref_str.to_string()
127127+ }
128128+ }
129129+}
130130+131131+#[cfg(test)]
132132+mod tests {
133133+ use super::*;
134134+135135+ #[test]
136136+ fn test_nsid_path_parse() {
137137+ let path = NsidPath::parse("com.atproto.repo.strongRef");
138138+ assert_eq!(path.segments(), &["com", "atproto", "repo", "strongRef"]);
139139+ assert_eq!(path.namespace(), "com.atproto");
140140+ assert_eq!(path.last_segment(), "strongRef");
141141+ assert_eq!(path.parent_segments(), &["com", "atproto", "repo"]);
142142+ assert!(!path.is_defs());
143143+ }
144144+145145+ #[test]
146146+ fn test_nsid_path_defs() {
147147+ let path = NsidPath::parse("com.atproto.label.defs");
148148+ assert!(path.is_defs());
149149+ assert_eq!(path.last_segment(), "defs");
150150+ }
151151+152152+ #[test]
153153+ fn test_ref_path_local() {
154154+ let ref_path = RefPath::parse("#option", Some("com.example.foo"));
155155+ assert_eq!(ref_path.nsid(), "com.example.foo");
156156+ assert_eq!(ref_path.def(), "option");
157157+ assert!(ref_path.is_local("com.example.foo"));
158158+ assert_eq!(ref_path.full_ref(), "com.example.foo#option");
159159+ }
160160+161161+ #[test]
162162+ fn test_ref_path_with_fragment() {
163163+ let ref_path = RefPath::parse("com.example.foo#bar", None);
164164+ assert_eq!(ref_path.nsid(), "com.example.foo");
165165+ assert_eq!(ref_path.def(), "bar");
166166+ assert!(!ref_path.is_local("com.other.baz"));
167167+ assert_eq!(ref_path.full_ref(), "com.example.foo#bar");
168168+ }
169169+170170+ #[test]
171171+ fn test_ref_path_implicit_main() {
172172+ let ref_path = RefPath::parse("com.example.foo", None);
173173+ assert_eq!(ref_path.nsid(), "com.example.foo");
174174+ assert_eq!(ref_path.def(), "main");
175175+ assert_eq!(ref_path.full_ref(), "com.example.foo");
176176+ }
177177+178178+ #[test]
179179+ fn test_ref_path_normalize() {
180180+ assert_eq!(
181181+ RefPath::normalize("#option", "com.example.foo"),
182182+ "com.example.foo#option"
183183+ );
184184+ assert_eq!(
185185+ RefPath::normalize("com.other.bar#baz", "com.example.foo"),
186186+ "com.other.bar#baz"
187187+ );
188188+ }
189189+}
+2
crates/jacquard-lexicon/src/schema.rs
···7474//! - **Validation**: Runtime constraint checking via `validate()` method
75757676pub mod builder;
7777+#[cfg(feature = "codegen")]
7778pub mod from_ast;
7979+#[cfg(feature = "codegen")]
7880pub mod type_mapping;
79818082use crate::lexicon::LexiconDoc;
-337
crates/jacquard-lexicon/src/union_registry.rs
···11-use crate::corpus::LexiconCorpus;
22-use crate::lexicon::{
33- LexArrayItem, LexObjectProperty, LexUserType, LexXrpcBodySchema,
44- LexXrpcSubscriptionMessageSchema,
55-};
66-use jacquard_common::smol_str::{SmolStr, ToSmolStr};
77-use jacquard_common::{CowStr, smol_str};
88-use std::collections::{BTreeMap, BTreeSet};
99-1010-/// Information about a single union type found in the corpus
1111-#[derive(Debug, Clone)]
1212-pub struct UnionInfo {
1313- /// NSID of the lexicon containing this union
1414- pub lexicon_nsid: SmolStr,
1515- /// Name of the def containing this union (e.g., "main", "replyRef")
1616- pub def_name: SmolStr,
1717- /// Field path within the def (e.g., "embed", "properties.embed")
1818- pub field_path: CowStr<'static>,
1919- /// Refs that exist in the corpus
2020- pub known_refs: Vec<CowStr<'static>>,
2121- /// Refs that don't exist in the corpus
2222- pub unknown_refs: Vec<CowStr<'static>>,
2323- /// Whether the union is closed (default true if not specified)
2424- pub closed: bool,
2525-}
2626-2727-impl UnionInfo {
2828- /// Get the source text for this union's lexicon from the corpus
2929- pub fn get_source<'c>(&self, corpus: &'c LexiconCorpus) -> Option<&'c str> {
3030- corpus.get_source(&self.lexicon_nsid)
3131- }
3232-3333- /// Check if this union has any unknown refs
3434- pub fn has_unknown_refs(&self) -> bool {
3535- !self.unknown_refs.is_empty()
3636- }
3737-3838- /// Get all refs (known + unknown)
3939- pub fn all_refs(&self) -> impl Iterator<Item = &CowStr<'static>> {
4040- self.known_refs.iter().chain(self.unknown_refs.iter())
4141- }
4242-}
4343-4444-/// Registry of all union types found in the corpus
4545-#[derive(Debug, Clone)]
4646-pub struct UnionRegistry {
4747- /// Map from union identifier to union info
4848- /// Key is "{lexicon_nsid}#{def_name}:{field_path}"
4949- unions: BTreeMap<SmolStr, UnionInfo>,
5050-}
5151-5252-impl UnionRegistry {
5353- /// Create a new empty union registry
5454- pub fn new() -> Self {
5555- Self {
5656- unions: BTreeMap::new(),
5757- }
5858- }
5959-6060- /// Build a union registry from a corpus
6161- pub fn from_corpus(corpus: &LexiconCorpus) -> Self {
6262- let mut registry = Self::new();
6363-6464- for (nsid, doc) in corpus.iter() {
6565- for (def_name, def) in &doc.defs {
6666- registry.collect_unions_from_def(corpus, nsid, def_name, def);
6767- }
6868- }
6969-7070- registry
7171- }
7272-7373- /// Collect unions from a single def
7474- fn collect_unions_from_def(
7575- &mut self,
7676- corpus: &LexiconCorpus,
7777- nsid: &SmolStr,
7878- def_name: &SmolStr,
7979- def: &LexUserType<'static>,
8080- ) {
8181- match def {
8282- LexUserType::Record(record) => match &record.record {
8383- crate::lexicon::LexRecordRecord::Object(obj) => {
8484- self.collect_unions_from_object(corpus, nsid, def_name, "", obj);
8585- }
8686- },
8787- LexUserType::Object(obj) => {
8888- self.collect_unions_from_object(corpus, nsid, def_name, "", obj);
8989- }
9090- LexUserType::XrpcQuery(query) => {
9191- if let Some(output) = &query.output {
9292- if let Some(schema) = &output.schema {
9393- self.collect_unions_from_xrpc_body_schema(
9494- corpus, nsid, def_name, "output", schema,
9595- );
9696- }
9797- }
9898- }
9999- LexUserType::XrpcProcedure(proc) => {
100100- if let Some(input) = &proc.input {
101101- if let Some(schema) = &input.schema {
102102- self.collect_unions_from_xrpc_body_schema(
103103- corpus, nsid, def_name, "input", schema,
104104- );
105105- }
106106- }
107107- if let Some(output) = &proc.output {
108108- if let Some(schema) = &output.schema {
109109- self.collect_unions_from_xrpc_body_schema(
110110- corpus, nsid, def_name, "output", schema,
111111- );
112112- }
113113- }
114114- }
115115- LexUserType::XrpcSubscription(sub) => {
116116- if let Some(message) = &sub.message {
117117- if let Some(schema) = &message.schema {
118118- self.collect_unions_from_subscription_message_schema(
119119- corpus, nsid, def_name, "message", schema,
120120- );
121121- }
122122- }
123123- }
124124- _ => {}
125125- }
126126- }
127127-128128- /// Collect unions from an object's properties
129129- fn collect_unions_from_object(
130130- &mut self,
131131- corpus: &LexiconCorpus,
132132- nsid: &SmolStr,
133133- def_name: &SmolStr,
134134- path_prefix: &str,
135135- obj: &crate::lexicon::LexObject<'static>,
136136- ) {
137137- for (prop_name, prop) in &obj.properties {
138138- let prop_path = if path_prefix.is_empty() {
139139- prop_name.to_smolstr()
140140- } else {
141141- smol_str::format_smolstr!("{}.{}", path_prefix, prop_name)
142142- };
143143-144144- match prop {
145145- LexObjectProperty::Union(union) => {
146146- self.register_union(
147147- corpus,
148148- nsid,
149149- def_name,
150150- &prop_path,
151151- &union.refs,
152152- union.closed,
153153- );
154154- }
155155- LexObjectProperty::Array(array) => {
156156- if let LexArrayItem::Union(union) = &array.items {
157157- let array_path = format!("{}[]", prop_path);
158158- self.register_union(
159159- corpus,
160160- nsid,
161161- def_name,
162162- &array_path,
163163- &union.refs,
164164- union.closed,
165165- );
166166- }
167167- }
168168- LexObjectProperty::Ref(ref_type) => {
169169- // Check if ref points to a union
170170- if let Some((_, ref_def)) = corpus.resolve_ref(ref_type.r#ref.as_ref()) {
171171- if matches!(ref_def, LexUserType::Object(_)) {
172172- // Recursively check the referenced object
173173- // (we'll handle this in a future iteration if needed)
174174- }
175175- }
176176- }
177177- _ => {}
178178- }
179179- }
180180- }
181181-182182- /// Collect unions from XRPC body schema
183183- fn collect_unions_from_xrpc_body_schema(
184184- &mut self,
185185- corpus: &LexiconCorpus,
186186- nsid: &SmolStr,
187187- def_name: &SmolStr,
188188- path: &str,
189189- schema: &LexXrpcBodySchema<'static>,
190190- ) {
191191- match schema {
192192- LexXrpcBodySchema::Union(union) => {
193193- self.register_union(corpus, nsid, def_name, path, &union.refs, union.closed);
194194- }
195195- LexXrpcBodySchema::Object(obj) => {
196196- self.collect_unions_from_object(corpus, nsid, def_name, path, obj);
197197- }
198198- _ => {}
199199- }
200200- }
201201-202202- /// Collect unions from subscription message schema
203203- fn collect_unions_from_subscription_message_schema(
204204- &mut self,
205205- corpus: &LexiconCorpus,
206206- nsid: &SmolStr,
207207- def_name: &SmolStr,
208208- path: &str,
209209- schema: &LexXrpcSubscriptionMessageSchema<'static>,
210210- ) {
211211- match schema {
212212- LexXrpcSubscriptionMessageSchema::Union(union) => {
213213- self.register_union(corpus, nsid, def_name, path, &union.refs, union.closed);
214214- }
215215- LexXrpcSubscriptionMessageSchema::Object(obj) => {
216216- self.collect_unions_from_object(corpus, nsid, def_name, path, obj);
217217- }
218218- _ => {}
219219- }
220220- }
221221-222222- /// Register a union with the registry
223223- fn register_union(
224224- &mut self,
225225- corpus: &LexiconCorpus,
226226- nsid: &SmolStr,
227227- def_name: &SmolStr,
228228- field_path: &str,
229229- refs: &[jacquard_common::CowStr<'static>],
230230- closed: Option<bool>,
231231- ) {
232232- let mut known_refs = Vec::new();
233233- let mut unknown_refs = Vec::new();
234234-235235- for ref_str in refs {
236236- if corpus.ref_exists(&ref_str) {
237237- known_refs.push(ref_str.clone());
238238- } else {
239239- unknown_refs.push(ref_str.clone());
240240- }
241241- }
242242-243243- let key = smol_str::format_smolstr!("{}#{}:{}", nsid, def_name, field_path);
244244- self.unions.insert(
245245- key,
246246- UnionInfo {
247247- lexicon_nsid: nsid.clone(),
248248- def_name: def_name.clone(),
249249- field_path: CowStr::Owned(field_path.to_smolstr()),
250250- known_refs,
251251- unknown_refs,
252252- closed: closed.unwrap_or(true),
253253- },
254254- );
255255- }
256256-257257- /// Get all unions
258258- pub fn iter(&self) -> impl Iterator<Item = (&SmolStr, &UnionInfo)> {
259259- self.unions.iter()
260260- }
261261-262262- /// Get a specific union
263263- pub fn get(&self, key: &str) -> Option<&UnionInfo> {
264264- self.unions.get(key)
265265- }
266266-267267- /// Number of unions in registry
268268- pub fn len(&self) -> usize {
269269- self.unions.len()
270270- }
271271-272272- /// Check if registry is empty
273273- pub fn is_empty(&self) -> bool {
274274- self.unions.is_empty()
275275- }
276276-277277- /// Get all unique refs across all unions
278278- pub fn all_refs(&self) -> BTreeSet<CowStr<'static>> {
279279- let mut refs = BTreeSet::new();
280280- for union in self.unions.values() {
281281- refs.extend(union.known_refs.iter().cloned());
282282- refs.extend(union.unknown_refs.iter().cloned());
283283- }
284284- refs
285285- }
286286-}
287287-288288-impl Default for UnionRegistry {
289289- fn default() -> Self {
290290- Self::new()
291291- }
292292-}
293293-294294-#[cfg(test)]
295295-mod tests {
296296- use super::*;
297297-298298- #[test]
299299- fn test_union_registry_from_corpus() {
300300- let corpus = LexiconCorpus::load_from_dir("tests/fixtures/test_lexicons")
301301- .expect("failed to load lexicons");
302302-303303- let registry = UnionRegistry::from_corpus(&corpus);
304304-305305- assert!(!registry.is_empty());
306306-307307- // Check that we found the embed union in post
308308- let post_embed = registry
309309- .iter()
310310- .find(|(_, info)| {
311311- info.lexicon_nsid == "app.bsky.feed.post"
312312- && info.def_name == "main"
313313- && info.field_path.contains("embed")
314314- })
315315- .expect("should find post embed union");
316316-317317- let info = post_embed.1;
318318- assert!(info.known_refs.contains(&"app.bsky.embed.images".into()));
319319- assert!(info.known_refs.contains(&"app.bsky.embed.video".into()));
320320- assert!(info.known_refs.contains(&"app.bsky.embed.external".into()));
321321- }
322322-323323- #[test]
324324- fn test_union_registry_tracks_unknown_refs() {
325325- let corpus = LexiconCorpus::load_from_dir("tests/fixtures/test_lexicons")
326326- .expect("failed to load lexicons");
327327-328328- let registry = UnionRegistry::from_corpus(&corpus);
329329-330330- // If there are any unknown refs, they should be tracked
331331- for (_, info) in registry.iter() {
332332- for unknown in &info.unknown_refs {
333333- assert!(!corpus.ref_exists(unknown));
334334- }
335335- }
336336- }
337337-}
+1-1
crates/jacquard-lexicon/src/validation.rs
···33//! This module provides infrastructure for validating untyped `Data` values against
44//! lexicon schemas, enabling partial deserialization, debugging, and schema migration.
5566-use crate::codegen::nsid_utils::RefPath;
76use crate::lexicon::{LexArrayItem, LexObjectProperty};
77+use crate::ref_utils::RefPath;
88use crate::schema::SchemaRegistry;
99use cid::Cid as IpldCid;
1010use dashmap::DashMap;