this repo has no description
1use serde_json::Value;
2use thiserror::Error;
3
4#[derive(Debug, Error)]
5pub enum ValidationError {
6 #[error("No $type provided")]
7 MissingType,
8
9 #[error("Invalid $type: expected {expected}, got {actual}")]
10 TypeMismatch { expected: String, actual: String },
11
12 #[error("Missing required field: {0}")]
13 MissingField(String),
14
15 #[error("Invalid field value at {path}: {message}")]
16 InvalidField { path: String, message: String },
17
18 #[error("Invalid datetime format at {path}: must be RFC-3339/ISO-8601")]
19 InvalidDatetime { path: String },
20
21 #[error("Invalid record: {0}")]
22 InvalidRecord(String),
23
24 #[error("Unknown record type: {0}")]
25 UnknownType(String),
26}
27
28#[derive(Debug, Clone, Copy, PartialEq, Eq)]
29pub enum ValidationStatus {
30 Valid,
31 Unknown,
32 Invalid,
33}
34
35pub struct RecordValidator {
36 require_lexicon: bool,
37}
38
39impl Default for RecordValidator {
40 fn default() -> Self {
41 Self::new()
42 }
43}
44
45impl RecordValidator {
46 pub fn new() -> Self {
47 Self {
48 require_lexicon: false,
49 }
50 }
51
52 pub fn require_lexicon(mut self, require: bool) -> Self {
53 self.require_lexicon = require;
54 self
55 }
56
57 pub fn validate(
58 &self,
59 record: &Value,
60 collection: &str,
61 ) -> Result<ValidationStatus, ValidationError> {
62 let obj = record
63 .as_object()
64 .ok_or_else(|| ValidationError::InvalidRecord("Record must be an object".to_string()))?;
65
66 let record_type = obj
67 .get("$type")
68 .and_then(|v| v.as_str())
69 .ok_or(ValidationError::MissingType)?;
70
71 if record_type != collection {
72 return Err(ValidationError::TypeMismatch {
73 expected: collection.to_string(),
74 actual: record_type.to_string(),
75 });
76 }
77
78 if let Some(created_at) = obj.get("createdAt").and_then(|v| v.as_str()) {
79 validate_datetime(created_at, "createdAt")?;
80 }
81
82 match record_type {
83 "app.bsky.feed.post" => self.validate_post(obj)?,
84 "app.bsky.actor.profile" => self.validate_profile(obj)?,
85 "app.bsky.feed.like" => self.validate_like(obj)?,
86 "app.bsky.feed.repost" => self.validate_repost(obj)?,
87 "app.bsky.graph.follow" => self.validate_follow(obj)?,
88 "app.bsky.graph.block" => self.validate_block(obj)?,
89 "app.bsky.graph.list" => self.validate_list(obj)?,
90 "app.bsky.graph.listitem" => self.validate_list_item(obj)?,
91 "app.bsky.feed.generator" => self.validate_feed_generator(obj)?,
92 "app.bsky.feed.threadgate" => self.validate_threadgate(obj)?,
93 "app.bsky.labeler.service" => self.validate_labeler_service(obj)?,
94 _ => {
95 if self.require_lexicon {
96 return Err(ValidationError::UnknownType(record_type.to_string()));
97 }
98 return Ok(ValidationStatus::Unknown);
99 }
100 }
101
102 Ok(ValidationStatus::Valid)
103 }
104
105 fn validate_post(&self, obj: &serde_json::Map<String, Value>) -> Result<(), ValidationError> {
106 if !obj.contains_key("text") {
107 return Err(ValidationError::MissingField("text".to_string()));
108 }
109
110 if !obj.contains_key("createdAt") {
111 return Err(ValidationError::MissingField("createdAt".to_string()));
112 }
113
114 if let Some(text) = obj.get("text").and_then(|v| v.as_str()) {
115 let grapheme_count = text.chars().count();
116 if grapheme_count > 3000 {
117 return Err(ValidationError::InvalidField {
118 path: "text".to_string(),
119 message: format!("Text exceeds maximum length of 3000 characters (got {})", grapheme_count),
120 });
121 }
122 }
123
124 if let Some(langs) = obj.get("langs").and_then(|v| v.as_array()) {
125 if langs.len() > 3 {
126 return Err(ValidationError::InvalidField {
127 path: "langs".to_string(),
128 message: "Maximum 3 languages allowed".to_string(),
129 });
130 }
131 }
132
133 if let Some(tags) = obj.get("tags").and_then(|v| v.as_array()) {
134 if tags.len() > 8 {
135 return Err(ValidationError::InvalidField {
136 path: "tags".to_string(),
137 message: "Maximum 8 tags allowed".to_string(),
138 });
139 }
140 for (i, tag) in tags.iter().enumerate() {
141 if let Some(tag_str) = tag.as_str() {
142 if tag_str.len() > 640 {
143 return Err(ValidationError::InvalidField {
144 path: format!("tags/{}", i),
145 message: "Tag exceeds maximum length of 640 bytes".to_string(),
146 });
147 }
148 }
149 }
150 }
151
152 Ok(())
153 }
154
155 fn validate_profile(&self, obj: &serde_json::Map<String, Value>) -> Result<(), ValidationError> {
156 if let Some(display_name) = obj.get("displayName").and_then(|v| v.as_str()) {
157 let grapheme_count = display_name.chars().count();
158 if grapheme_count > 640 {
159 return Err(ValidationError::InvalidField {
160 path: "displayName".to_string(),
161 message: format!("Display name exceeds maximum length of 640 characters (got {})", grapheme_count),
162 });
163 }
164 }
165
166 if let Some(description) = obj.get("description").and_then(|v| v.as_str()) {
167 let grapheme_count = description.chars().count();
168 if grapheme_count > 2560 {
169 return Err(ValidationError::InvalidField {
170 path: "description".to_string(),
171 message: format!("Description exceeds maximum length of 2560 characters (got {})", grapheme_count),
172 });
173 }
174 }
175
176 Ok(())
177 }
178
179 fn validate_like(&self, obj: &serde_json::Map<String, Value>) -> Result<(), ValidationError> {
180 if !obj.contains_key("subject") {
181 return Err(ValidationError::MissingField("subject".to_string()));
182 }
183 if !obj.contains_key("createdAt") {
184 return Err(ValidationError::MissingField("createdAt".to_string()));
185 }
186 self.validate_strong_ref(obj.get("subject"), "subject")?;
187 Ok(())
188 }
189
190 fn validate_repost(&self, obj: &serde_json::Map<String, Value>) -> Result<(), ValidationError> {
191 if !obj.contains_key("subject") {
192 return Err(ValidationError::MissingField("subject".to_string()));
193 }
194 if !obj.contains_key("createdAt") {
195 return Err(ValidationError::MissingField("createdAt".to_string()));
196 }
197 self.validate_strong_ref(obj.get("subject"), "subject")?;
198 Ok(())
199 }
200
201 fn validate_follow(&self, obj: &serde_json::Map<String, Value>) -> Result<(), ValidationError> {
202 if !obj.contains_key("subject") {
203 return Err(ValidationError::MissingField("subject".to_string()));
204 }
205 if !obj.contains_key("createdAt") {
206 return Err(ValidationError::MissingField("createdAt".to_string()));
207 }
208
209 if let Some(subject) = obj.get("subject").and_then(|v| v.as_str()) {
210 if !subject.starts_with("did:") {
211 return Err(ValidationError::InvalidField {
212 path: "subject".to_string(),
213 message: "Subject must be a DID".to_string(),
214 });
215 }
216 }
217
218 Ok(())
219 }
220
221 fn validate_block(&self, obj: &serde_json::Map<String, Value>) -> Result<(), ValidationError> {
222 if !obj.contains_key("subject") {
223 return Err(ValidationError::MissingField("subject".to_string()));
224 }
225 if !obj.contains_key("createdAt") {
226 return Err(ValidationError::MissingField("createdAt".to_string()));
227 }
228
229 if let Some(subject) = obj.get("subject").and_then(|v| v.as_str()) {
230 if !subject.starts_with("did:") {
231 return Err(ValidationError::InvalidField {
232 path: "subject".to_string(),
233 message: "Subject must be a DID".to_string(),
234 });
235 }
236 }
237
238 Ok(())
239 }
240
241 fn validate_list(&self, obj: &serde_json::Map<String, Value>) -> Result<(), ValidationError> {
242 if !obj.contains_key("name") {
243 return Err(ValidationError::MissingField("name".to_string()));
244 }
245 if !obj.contains_key("purpose") {
246 return Err(ValidationError::MissingField("purpose".to_string()));
247 }
248 if !obj.contains_key("createdAt") {
249 return Err(ValidationError::MissingField("createdAt".to_string()));
250 }
251
252 if let Some(name) = obj.get("name").and_then(|v| v.as_str()) {
253 if name.is_empty() || name.len() > 64 {
254 return Err(ValidationError::InvalidField {
255 path: "name".to_string(),
256 message: "Name must be 1-64 characters".to_string(),
257 });
258 }
259 }
260
261 Ok(())
262 }
263
264 fn validate_list_item(&self, obj: &serde_json::Map<String, Value>) -> Result<(), ValidationError> {
265 if !obj.contains_key("subject") {
266 return Err(ValidationError::MissingField("subject".to_string()));
267 }
268 if !obj.contains_key("list") {
269 return Err(ValidationError::MissingField("list".to_string()));
270 }
271 if !obj.contains_key("createdAt") {
272 return Err(ValidationError::MissingField("createdAt".to_string()));
273 }
274 Ok(())
275 }
276
277 fn validate_feed_generator(&self, obj: &serde_json::Map<String, Value>) -> Result<(), ValidationError> {
278 if !obj.contains_key("did") {
279 return Err(ValidationError::MissingField("did".to_string()));
280 }
281 if !obj.contains_key("displayName") {
282 return Err(ValidationError::MissingField("displayName".to_string()));
283 }
284 if !obj.contains_key("createdAt") {
285 return Err(ValidationError::MissingField("createdAt".to_string()));
286 }
287
288 if let Some(display_name) = obj.get("displayName").and_then(|v| v.as_str()) {
289 if display_name.is_empty() || display_name.len() > 240 {
290 return Err(ValidationError::InvalidField {
291 path: "displayName".to_string(),
292 message: "displayName must be 1-240 characters".to_string(),
293 });
294 }
295 }
296
297 Ok(())
298 }
299
300 fn validate_threadgate(&self, obj: &serde_json::Map<String, Value>) -> Result<(), ValidationError> {
301 if !obj.contains_key("post") {
302 return Err(ValidationError::MissingField("post".to_string()));
303 }
304 if !obj.contains_key("createdAt") {
305 return Err(ValidationError::MissingField("createdAt".to_string()));
306 }
307 Ok(())
308 }
309
310 fn validate_labeler_service(&self, obj: &serde_json::Map<String, Value>) -> Result<(), ValidationError> {
311 if !obj.contains_key("policies") {
312 return Err(ValidationError::MissingField("policies".to_string()));
313 }
314 if !obj.contains_key("createdAt") {
315 return Err(ValidationError::MissingField("createdAt".to_string()));
316 }
317 Ok(())
318 }
319
320 fn validate_strong_ref(&self, value: Option<&Value>, path: &str) -> Result<(), ValidationError> {
321 let obj = value
322 .and_then(|v| v.as_object())
323 .ok_or_else(|| ValidationError::InvalidField {
324 path: path.to_string(),
325 message: "Must be a strong reference object".to_string(),
326 })?;
327
328 if !obj.contains_key("uri") {
329 return Err(ValidationError::MissingField(format!("{}/uri", path)));
330 }
331 if !obj.contains_key("cid") {
332 return Err(ValidationError::MissingField(format!("{}/cid", path)));
333 }
334
335 if let Some(uri) = obj.get("uri").and_then(|v| v.as_str()) {
336 if !uri.starts_with("at://") {
337 return Err(ValidationError::InvalidField {
338 path: format!("{}/uri", path),
339 message: "URI must be an at:// URI".to_string(),
340 });
341 }
342 }
343
344 Ok(())
345 }
346}
347
348fn validate_datetime(value: &str, path: &str) -> Result<(), ValidationError> {
349 if chrono::DateTime::parse_from_rfc3339(value).is_err() {
350 return Err(ValidationError::InvalidDatetime {
351 path: path.to_string(),
352 });
353 }
354 Ok(())
355}
356
357pub fn validate_record_key(rkey: &str) -> Result<(), ValidationError> {
358 if rkey.is_empty() {
359 return Err(ValidationError::InvalidRecord("Record key cannot be empty".to_string()));
360 }
361
362 if rkey.len() > 512 {
363 return Err(ValidationError::InvalidRecord("Record key exceeds maximum length of 512".to_string()));
364 }
365
366 if rkey == "." || rkey == ".." {
367 return Err(ValidationError::InvalidRecord("Record key cannot be '.' or '..'".to_string()));
368 }
369
370 let valid_chars = rkey.chars().all(|c| {
371 c.is_ascii_alphanumeric() || c == '.' || c == '-' || c == '_' || c == '~'
372 });
373
374 if !valid_chars {
375 return Err(ValidationError::InvalidRecord(
376 "Record key contains invalid characters (must be alphanumeric, '.', '-', '_', or '~')".to_string()
377 ));
378 }
379
380 Ok(())
381}
382
383pub fn validate_collection_nsid(collection: &str) -> Result<(), ValidationError> {
384 if collection.is_empty() {
385 return Err(ValidationError::InvalidRecord("Collection NSID cannot be empty".to_string()));
386 }
387
388 let parts: Vec<&str> = collection.split('.').collect();
389 if parts.len() < 3 {
390 return Err(ValidationError::InvalidRecord(
391 "Collection NSID must have at least 3 segments".to_string()
392 ));
393 }
394
395 for part in &parts {
396 if part.is_empty() {
397 return Err(ValidationError::InvalidRecord(
398 "Collection NSID segments cannot be empty".to_string()
399 ));
400 }
401 if !part.chars().all(|c| c.is_ascii_alphanumeric() || c == '-') {
402 return Err(ValidationError::InvalidRecord(
403 "Collection NSID segments must be alphanumeric or hyphens".to_string()
404 ));
405 }
406 }
407
408 Ok(())
409}
410
411#[cfg(test)]
412mod tests {
413 use super::*;
414 use serde_json::json;
415
416 #[test]
417 fn test_validate_post() {
418 let validator = RecordValidator::new();
419
420 let valid_post = json!({
421 "$type": "app.bsky.feed.post",
422 "text": "Hello, world!",
423 "createdAt": "2024-01-01T00:00:00.000Z"
424 });
425
426 assert_eq!(
427 validator.validate(&valid_post, "app.bsky.feed.post").unwrap(),
428 ValidationStatus::Valid
429 );
430 }
431
432 #[test]
433 fn test_validate_post_missing_text() {
434 let validator = RecordValidator::new();
435
436 let invalid_post = json!({
437 "$type": "app.bsky.feed.post",
438 "createdAt": "2024-01-01T00:00:00.000Z"
439 });
440
441 assert!(validator.validate(&invalid_post, "app.bsky.feed.post").is_err());
442 }
443
444 #[test]
445 fn test_validate_type_mismatch() {
446 let validator = RecordValidator::new();
447
448 let record = json!({
449 "$type": "app.bsky.feed.like",
450 "subject": {"uri": "at://did:plc:test/app.bsky.feed.post/123", "cid": "bafyrei..."},
451 "createdAt": "2024-01-01T00:00:00.000Z"
452 });
453
454 let result = validator.validate(&record, "app.bsky.feed.post");
455 assert!(matches!(result, Err(ValidationError::TypeMismatch { .. })));
456 }
457
458 #[test]
459 fn test_validate_unknown_type() {
460 let validator = RecordValidator::new();
461
462 let record = json!({
463 "$type": "com.example.custom",
464 "data": "test"
465 });
466
467 assert_eq!(
468 validator.validate(&record, "com.example.custom").unwrap(),
469 ValidationStatus::Unknown
470 );
471 }
472
473 #[test]
474 fn test_validate_unknown_type_strict() {
475 let validator = RecordValidator::new().require_lexicon(true);
476
477 let record = json!({
478 "$type": "com.example.custom",
479 "data": "test"
480 });
481
482 let result = validator.validate(&record, "com.example.custom");
483 assert!(matches!(result, Err(ValidationError::UnknownType(_))));
484 }
485
486 #[test]
487 fn test_validate_record_key() {
488 assert!(validate_record_key("valid-key_123").is_ok());
489 assert!(validate_record_key("3k2n5j2").is_ok());
490 assert!(validate_record_key(".").is_err());
491 assert!(validate_record_key("..").is_err());
492 assert!(validate_record_key("").is_err());
493 assert!(validate_record_key("invalid/key").is_err());
494 }
495
496 #[test]
497 fn test_validate_collection_nsid() {
498 assert!(validate_collection_nsid("app.bsky.feed.post").is_ok());
499 assert!(validate_collection_nsid("com.atproto.repo.record").is_ok());
500 assert!(validate_collection_nsid("invalid").is_err());
501 assert!(validate_collection_nsid("a.b").is_err());
502 assert!(validate_collection_nsid("").is_err());
503 }
504}