this repo has no description
1use serde_json::Value;
2use thiserror::Error;
3
4#[derive(Debug, Error)]
5pub enum ValidationError {
6 #[error("No $type provided")]
7 MissingType,
8 #[error("Invalid $type: expected {expected}, got {actual}")]
9 TypeMismatch { expected: String, actual: String },
10 #[error("Missing required field: {0}")]
11 MissingField(String),
12 #[error("Invalid field value at {path}: {message}")]
13 InvalidField { path: String, message: String },
14 #[error("Invalid datetime format at {path}: must be RFC-3339/ISO-8601")]
15 InvalidDatetime { path: String },
16 #[error("Invalid record: {0}")]
17 InvalidRecord(String),
18 #[error("Unknown record type: {0}")]
19 UnknownType(String),
20}
21
22#[derive(Debug, Clone, Copy, PartialEq, Eq)]
23pub enum ValidationStatus {
24 Valid,
25 Unknown,
26 Invalid,
27}
28
29pub struct RecordValidator {
30 require_lexicon: bool,
31}
32
33impl Default for RecordValidator {
34 fn default() -> Self {
35 Self::new()
36 }
37}
38
39impl RecordValidator {
40 pub fn new() -> Self {
41 Self {
42 require_lexicon: false,
43 }
44 }
45
46 pub fn require_lexicon(mut self, require: bool) -> Self {
47 self.require_lexicon = require;
48 self
49 }
50
51 pub fn validate(
52 &self,
53 record: &Value,
54 collection: &str,
55 ) -> Result<ValidationStatus, ValidationError> {
56 let obj = record.as_object().ok_or_else(|| {
57 ValidationError::InvalidRecord("Record must be an object".to_string())
58 })?;
59 let record_type = obj
60 .get("$type")
61 .and_then(|v| v.as_str())
62 .ok_or(ValidationError::MissingType)?;
63 if record_type != collection {
64 return Err(ValidationError::TypeMismatch {
65 expected: collection.to_string(),
66 actual: record_type.to_string(),
67 });
68 }
69 if let Some(created_at) = obj.get("createdAt").and_then(|v| v.as_str()) {
70 validate_datetime(created_at, "createdAt")?;
71 }
72 match record_type {
73 "app.bsky.feed.post" => self.validate_post(obj)?,
74 "app.bsky.actor.profile" => self.validate_profile(obj)?,
75 "app.bsky.feed.like" => self.validate_like(obj)?,
76 "app.bsky.feed.repost" => self.validate_repost(obj)?,
77 "app.bsky.graph.follow" => self.validate_follow(obj)?,
78 "app.bsky.graph.block" => self.validate_block(obj)?,
79 "app.bsky.graph.list" => self.validate_list(obj)?,
80 "app.bsky.graph.listitem" => self.validate_list_item(obj)?,
81 "app.bsky.feed.generator" => self.validate_feed_generator(obj)?,
82 "app.bsky.feed.threadgate" => self.validate_threadgate(obj)?,
83 "app.bsky.labeler.service" => self.validate_labeler_service(obj)?,
84 _ => {
85 if self.require_lexicon {
86 return Err(ValidationError::UnknownType(record_type.to_string()));
87 }
88 return Ok(ValidationStatus::Unknown);
89 }
90 }
91 Ok(ValidationStatus::Valid)
92 }
93
94 fn validate_post(&self, obj: &serde_json::Map<String, Value>) -> Result<(), ValidationError> {
95 if !obj.contains_key("text") {
96 return Err(ValidationError::MissingField("text".to_string()));
97 }
98 if !obj.contains_key("createdAt") {
99 return Err(ValidationError::MissingField("createdAt".to_string()));
100 }
101 if let Some(text) = obj.get("text").and_then(|v| v.as_str()) {
102 let grapheme_count = text.chars().count();
103 if grapheme_count > 3000 {
104 return Err(ValidationError::InvalidField {
105 path: "text".to_string(),
106 message: format!(
107 "Text exceeds maximum length of 3000 characters (got {})",
108 grapheme_count
109 ),
110 });
111 }
112 }
113 if let Some(langs) = obj.get("langs").and_then(|v| v.as_array())
114 && langs.len() > 3 {
115 return Err(ValidationError::InvalidField {
116 path: "langs".to_string(),
117 message: "Maximum 3 languages allowed".to_string(),
118 });
119 }
120 if let Some(tags) = obj.get("tags").and_then(|v| v.as_array()) {
121 if tags.len() > 8 {
122 return Err(ValidationError::InvalidField {
123 path: "tags".to_string(),
124 message: "Maximum 8 tags allowed".to_string(),
125 });
126 }
127 for (i, tag) in tags.iter().enumerate() {
128 if let Some(tag_str) = tag.as_str()
129 && tag_str.len() > 640 {
130 return Err(ValidationError::InvalidField {
131 path: format!("tags/{}", i),
132 message: "Tag exceeds maximum length of 640 bytes".to_string(),
133 });
134 }
135 }
136 }
137 Ok(())
138 }
139
140 fn validate_profile(
141 &self,
142 obj: &serde_json::Map<String, Value>,
143 ) -> Result<(), ValidationError> {
144 if let Some(display_name) = obj.get("displayName").and_then(|v| v.as_str()) {
145 let grapheme_count = display_name.chars().count();
146 if grapheme_count > 640 {
147 return Err(ValidationError::InvalidField {
148 path: "displayName".to_string(),
149 message: format!(
150 "Display name exceeds maximum length of 640 characters (got {})",
151 grapheme_count
152 ),
153 });
154 }
155 }
156 if let Some(description) = obj.get("description").and_then(|v| v.as_str()) {
157 let grapheme_count = description.chars().count();
158 if grapheme_count > 2560 {
159 return Err(ValidationError::InvalidField {
160 path: "description".to_string(),
161 message: format!(
162 "Description exceeds maximum length of 2560 characters (got {})",
163 grapheme_count
164 ),
165 });
166 }
167 }
168 Ok(())
169 }
170
171 fn validate_like(&self, obj: &serde_json::Map<String, Value>) -> Result<(), ValidationError> {
172 if !obj.contains_key("subject") {
173 return Err(ValidationError::MissingField("subject".to_string()));
174 }
175 if !obj.contains_key("createdAt") {
176 return Err(ValidationError::MissingField("createdAt".to_string()));
177 }
178 self.validate_strong_ref(obj.get("subject"), "subject")?;
179 Ok(())
180 }
181
182 fn validate_repost(&self, obj: &serde_json::Map<String, Value>) -> Result<(), ValidationError> {
183 if !obj.contains_key("subject") {
184 return Err(ValidationError::MissingField("subject".to_string()));
185 }
186 if !obj.contains_key("createdAt") {
187 return Err(ValidationError::MissingField("createdAt".to_string()));
188 }
189 self.validate_strong_ref(obj.get("subject"), "subject")?;
190 Ok(())
191 }
192
193 fn validate_follow(&self, obj: &serde_json::Map<String, Value>) -> Result<(), ValidationError> {
194 if !obj.contains_key("subject") {
195 return Err(ValidationError::MissingField("subject".to_string()));
196 }
197 if !obj.contains_key("createdAt") {
198 return Err(ValidationError::MissingField("createdAt".to_string()));
199 }
200 if let Some(subject) = obj.get("subject").and_then(|v| v.as_str())
201 && !subject.starts_with("did:") {
202 return Err(ValidationError::InvalidField {
203 path: "subject".to_string(),
204 message: "Subject must be a DID".to_string(),
205 });
206 }
207 Ok(())
208 }
209
210 fn validate_block(&self, obj: &serde_json::Map<String, Value>) -> Result<(), ValidationError> {
211 if !obj.contains_key("subject") {
212 return Err(ValidationError::MissingField("subject".to_string()));
213 }
214 if !obj.contains_key("createdAt") {
215 return Err(ValidationError::MissingField("createdAt".to_string()));
216 }
217 if let Some(subject) = obj.get("subject").and_then(|v| v.as_str())
218 && !subject.starts_with("did:") {
219 return Err(ValidationError::InvalidField {
220 path: "subject".to_string(),
221 message: "Subject must be a DID".to_string(),
222 });
223 }
224 Ok(())
225 }
226
227 fn validate_list(&self, obj: &serde_json::Map<String, Value>) -> Result<(), ValidationError> {
228 if !obj.contains_key("name") {
229 return Err(ValidationError::MissingField("name".to_string()));
230 }
231 if !obj.contains_key("purpose") {
232 return Err(ValidationError::MissingField("purpose".to_string()));
233 }
234 if !obj.contains_key("createdAt") {
235 return Err(ValidationError::MissingField("createdAt".to_string()));
236 }
237 if let Some(name) = obj.get("name").and_then(|v| v.as_str())
238 && (name.is_empty() || name.len() > 64) {
239 return Err(ValidationError::InvalidField {
240 path: "name".to_string(),
241 message: "Name must be 1-64 characters".to_string(),
242 });
243 }
244 Ok(())
245 }
246
247 fn validate_list_item(
248 &self,
249 obj: &serde_json::Map<String, Value>,
250 ) -> Result<(), ValidationError> {
251 if !obj.contains_key("subject") {
252 return Err(ValidationError::MissingField("subject".to_string()));
253 }
254 if !obj.contains_key("list") {
255 return Err(ValidationError::MissingField("list".to_string()));
256 }
257 if !obj.contains_key("createdAt") {
258 return Err(ValidationError::MissingField("createdAt".to_string()));
259 }
260 Ok(())
261 }
262
263 fn validate_feed_generator(
264 &self,
265 obj: &serde_json::Map<String, Value>,
266 ) -> Result<(), ValidationError> {
267 if !obj.contains_key("did") {
268 return Err(ValidationError::MissingField("did".to_string()));
269 }
270 if !obj.contains_key("displayName") {
271 return Err(ValidationError::MissingField("displayName".to_string()));
272 }
273 if !obj.contains_key("createdAt") {
274 return Err(ValidationError::MissingField("createdAt".to_string()));
275 }
276 if let Some(display_name) = obj.get("displayName").and_then(|v| v.as_str())
277 && (display_name.is_empty() || display_name.len() > 240) {
278 return Err(ValidationError::InvalidField {
279 path: "displayName".to_string(),
280 message: "displayName must be 1-240 characters".to_string(),
281 });
282 }
283 Ok(())
284 }
285
286 fn validate_threadgate(
287 &self,
288 obj: &serde_json::Map<String, Value>,
289 ) -> Result<(), ValidationError> {
290 if !obj.contains_key("post") {
291 return Err(ValidationError::MissingField("post".to_string()));
292 }
293 if !obj.contains_key("createdAt") {
294 return Err(ValidationError::MissingField("createdAt".to_string()));
295 }
296 Ok(())
297 }
298
299 fn validate_labeler_service(
300 &self,
301 obj: &serde_json::Map<String, Value>,
302 ) -> Result<(), ValidationError> {
303 if !obj.contains_key("policies") {
304 return Err(ValidationError::MissingField("policies".to_string()));
305 }
306 if !obj.contains_key("createdAt") {
307 return Err(ValidationError::MissingField("createdAt".to_string()));
308 }
309 Ok(())
310 }
311
312 fn validate_strong_ref(
313 &self,
314 value: Option<&Value>,
315 path: &str,
316 ) -> Result<(), ValidationError> {
317 let obj =
318 value
319 .and_then(|v| v.as_object())
320 .ok_or_else(|| ValidationError::InvalidField {
321 path: path.to_string(),
322 message: "Must be a strong reference object".to_string(),
323 })?;
324 if !obj.contains_key("uri") {
325 return Err(ValidationError::MissingField(format!("{}/uri", path)));
326 }
327 if !obj.contains_key("cid") {
328 return Err(ValidationError::MissingField(format!("{}/cid", path)));
329 }
330 if let Some(uri) = obj.get("uri").and_then(|v| v.as_str())
331 && !uri.starts_with("at://") {
332 return Err(ValidationError::InvalidField {
333 path: format!("{}/uri", path),
334 message: "URI must be an at:// URI".to_string(),
335 });
336 }
337 Ok(())
338 }
339}
340
341fn validate_datetime(value: &str, path: &str) -> Result<(), ValidationError> {
342 if chrono::DateTime::parse_from_rfc3339(value).is_err() {
343 return Err(ValidationError::InvalidDatetime {
344 path: path.to_string(),
345 });
346 }
347 Ok(())
348}
349
350pub fn validate_record_key(rkey: &str) -> Result<(), ValidationError> {
351 if rkey.is_empty() {
352 return Err(ValidationError::InvalidRecord(
353 "Record key cannot be empty".to_string(),
354 ));
355 }
356 if rkey.len() > 512 {
357 return Err(ValidationError::InvalidRecord(
358 "Record key exceeds maximum length of 512".to_string(),
359 ));
360 }
361 if rkey == "." || rkey == ".." {
362 return Err(ValidationError::InvalidRecord(
363 "Record key cannot be '.' or '..'".to_string(),
364 ));
365 }
366 let valid_chars = rkey
367 .chars()
368 .all(|c| c.is_ascii_alphanumeric() || c == '.' || c == '-' || c == '_' || c == '~');
369 if !valid_chars {
370 return Err(ValidationError::InvalidRecord(
371 "Record key contains invalid characters (must be alphanumeric, '.', '-', '_', or '~')"
372 .to_string(),
373 ));
374 }
375 Ok(())
376}
377
378pub fn validate_collection_nsid(collection: &str) -> Result<(), ValidationError> {
379 if collection.is_empty() {
380 return Err(ValidationError::InvalidRecord(
381 "Collection NSID cannot be empty".to_string(),
382 ));
383 }
384 let parts: Vec<&str> = collection.split('.').collect();
385 if parts.len() < 3 {
386 return Err(ValidationError::InvalidRecord(
387 "Collection NSID must have at least 3 segments".to_string(),
388 ));
389 }
390 for part in &parts {
391 if part.is_empty() {
392 return Err(ValidationError::InvalidRecord(
393 "Collection NSID segments cannot be empty".to_string(),
394 ));
395 }
396 if !part.chars().all(|c| c.is_ascii_alphanumeric() || c == '-') {
397 return Err(ValidationError::InvalidRecord(
398 "Collection NSID segments must be alphanumeric or hyphens".to_string(),
399 ));
400 }
401 }
402 Ok(())
403}
404
405#[cfg(test)]
406mod tests {
407 use super::*;
408 use serde_json::json;
409
410 #[test]
411 fn test_validate_post() {
412 let validator = RecordValidator::new();
413 let valid_post = json!({
414 "$type": "app.bsky.feed.post",
415 "text": "Hello, world!",
416 "createdAt": "2024-01-01T00:00:00.000Z"
417 });
418 assert_eq!(
419 validator
420 .validate(&valid_post, "app.bsky.feed.post")
421 .unwrap(),
422 ValidationStatus::Valid
423 );
424 }
425
426 #[test]
427 fn test_validate_post_missing_text() {
428 let validator = RecordValidator::new();
429 let invalid_post = json!({
430 "$type": "app.bsky.feed.post",
431 "createdAt": "2024-01-01T00:00:00.000Z"
432 });
433 assert!(
434 validator
435 .validate(&invalid_post, "app.bsky.feed.post")
436 .is_err()
437 );
438 }
439
440 #[test]
441 fn test_validate_type_mismatch() {
442 let validator = RecordValidator::new();
443 let record = json!({
444 "$type": "app.bsky.feed.like",
445 "subject": {"uri": "at://did:plc:test/app.bsky.feed.post/123", "cid": "bafyrei..."},
446 "createdAt": "2024-01-01T00:00:00.000Z"
447 });
448 let result = validator.validate(&record, "app.bsky.feed.post");
449 assert!(matches!(result, Err(ValidationError::TypeMismatch { .. })));
450 }
451
452 #[test]
453 fn test_validate_unknown_type() {
454 let validator = RecordValidator::new();
455 let record = json!({
456 "$type": "com.example.custom",
457 "data": "test"
458 });
459 assert_eq!(
460 validator.validate(&record, "com.example.custom").unwrap(),
461 ValidationStatus::Unknown
462 );
463 }
464
465 #[test]
466 fn test_validate_unknown_type_strict() {
467 let validator = RecordValidator::new().require_lexicon(true);
468 let record = json!({
469 "$type": "com.example.custom",
470 "data": "test"
471 });
472 let result = validator.validate(&record, "com.example.custom");
473 assert!(matches!(result, Err(ValidationError::UnknownType(_))));
474 }
475
476 #[test]
477 fn test_validate_record_key() {
478 assert!(validate_record_key("valid-key_123").is_ok());
479 assert!(validate_record_key("3k2n5j2").is_ok());
480 assert!(validate_record_key(".").is_err());
481 assert!(validate_record_key("..").is_err());
482 assert!(validate_record_key("").is_err());
483 assert!(validate_record_key("invalid/key").is_err());
484 }
485
486 #[test]
487 fn test_validate_collection_nsid() {
488 assert!(validate_collection_nsid("app.bsky.feed.post").is_ok());
489 assert!(validate_collection_nsid("com.atproto.repo.record").is_ok());
490 assert!(validate_collection_nsid("invalid").is_err());
491 assert!(validate_collection_nsid("a.b").is_err());
492 assert!(validate_collection_nsid("").is_err());
493 }
494}