this repo has no description
1use serde_json::Value;
2use thiserror::Error;
3
4#[derive(Debug, Error)]
5pub enum ValidationError {
6 #[error("No $type provided")]
7 MissingType,
8 #[error("Invalid $type: expected {expected}, got {actual}")]
9 TypeMismatch { expected: String, actual: String },
10 #[error("Missing required field: {0}")]
11 MissingField(String),
12 #[error("Invalid field value at {path}: {message}")]
13 InvalidField { path: String, message: String },
14 #[error("Invalid datetime format at {path}: must be RFC-3339/ISO-8601")]
15 InvalidDatetime { path: String },
16 #[error("Invalid record: {0}")]
17 InvalidRecord(String),
18 #[error("Unknown record type: {0}")]
19 UnknownType(String),
20}
21
22#[derive(Debug, Clone, Copy, PartialEq, Eq)]
23pub enum ValidationStatus {
24 Valid,
25 Unknown,
26 Invalid,
27}
28
29pub struct RecordValidator {
30 require_lexicon: bool,
31}
32
33impl Default for RecordValidator {
34 fn default() -> Self {
35 Self::new()
36 }
37}
38
39impl RecordValidator {
40 pub fn new() -> Self {
41 Self {
42 require_lexicon: false,
43 }
44 }
45
46 pub fn require_lexicon(mut self, require: bool) -> Self {
47 self.require_lexicon = require;
48 self
49 }
50
51 pub fn validate(
52 &self,
53 record: &Value,
54 collection: &str,
55 ) -> Result<ValidationStatus, ValidationError> {
56 let obj = record
57 .as_object()
58 .ok_or_else(|| ValidationError::InvalidRecord("Record must be an object".to_string()))?;
59 let record_type = obj
60 .get("$type")
61 .and_then(|v| v.as_str())
62 .ok_or(ValidationError::MissingType)?;
63 if record_type != collection {
64 return Err(ValidationError::TypeMismatch {
65 expected: collection.to_string(),
66 actual: record_type.to_string(),
67 });
68 }
69 if let Some(created_at) = obj.get("createdAt").and_then(|v| v.as_str()) {
70 validate_datetime(created_at, "createdAt")?;
71 }
72 match record_type {
73 "app.bsky.feed.post" => self.validate_post(obj)?,
74 "app.bsky.actor.profile" => self.validate_profile(obj)?,
75 "app.bsky.feed.like" => self.validate_like(obj)?,
76 "app.bsky.feed.repost" => self.validate_repost(obj)?,
77 "app.bsky.graph.follow" => self.validate_follow(obj)?,
78 "app.bsky.graph.block" => self.validate_block(obj)?,
79 "app.bsky.graph.list" => self.validate_list(obj)?,
80 "app.bsky.graph.listitem" => self.validate_list_item(obj)?,
81 "app.bsky.feed.generator" => self.validate_feed_generator(obj)?,
82 "app.bsky.feed.threadgate" => self.validate_threadgate(obj)?,
83 "app.bsky.labeler.service" => self.validate_labeler_service(obj)?,
84 _ => {
85 if self.require_lexicon {
86 return Err(ValidationError::UnknownType(record_type.to_string()));
87 }
88 return Ok(ValidationStatus::Unknown);
89 }
90 }
91 Ok(ValidationStatus::Valid)
92 }
93
94 fn validate_post(&self, obj: &serde_json::Map<String, Value>) -> Result<(), ValidationError> {
95 if !obj.contains_key("text") {
96 return Err(ValidationError::MissingField("text".to_string()));
97 }
98 if !obj.contains_key("createdAt") {
99 return Err(ValidationError::MissingField("createdAt".to_string()));
100 }
101 if let Some(text) = obj.get("text").and_then(|v| v.as_str()) {
102 let grapheme_count = text.chars().count();
103 if grapheme_count > 3000 {
104 return Err(ValidationError::InvalidField {
105 path: "text".to_string(),
106 message: format!("Text exceeds maximum length of 3000 characters (got {})", grapheme_count),
107 });
108 }
109 }
110 if let Some(langs) = obj.get("langs").and_then(|v| v.as_array()) {
111 if langs.len() > 3 {
112 return Err(ValidationError::InvalidField {
113 path: "langs".to_string(),
114 message: "Maximum 3 languages allowed".to_string(),
115 });
116 }
117 }
118 if let Some(tags) = obj.get("tags").and_then(|v| v.as_array()) {
119 if tags.len() > 8 {
120 return Err(ValidationError::InvalidField {
121 path: "tags".to_string(),
122 message: "Maximum 8 tags allowed".to_string(),
123 });
124 }
125 for (i, tag) in tags.iter().enumerate() {
126 if let Some(tag_str) = tag.as_str() {
127 if tag_str.len() > 640 {
128 return Err(ValidationError::InvalidField {
129 path: format!("tags/{}", i),
130 message: "Tag exceeds maximum length of 640 bytes".to_string(),
131 });
132 }
133 }
134 }
135 }
136 Ok(())
137 }
138
139 fn validate_profile(&self, obj: &serde_json::Map<String, Value>) -> Result<(), ValidationError> {
140 if let Some(display_name) = obj.get("displayName").and_then(|v| v.as_str()) {
141 let grapheme_count = display_name.chars().count();
142 if grapheme_count > 640 {
143 return Err(ValidationError::InvalidField {
144 path: "displayName".to_string(),
145 message: format!("Display name exceeds maximum length of 640 characters (got {})", grapheme_count),
146 });
147 }
148 }
149 if let Some(description) = obj.get("description").and_then(|v| v.as_str()) {
150 let grapheme_count = description.chars().count();
151 if grapheme_count > 2560 {
152 return Err(ValidationError::InvalidField {
153 path: "description".to_string(),
154 message: format!("Description exceeds maximum length of 2560 characters (got {})", grapheme_count),
155 });
156 }
157 }
158 Ok(())
159 }
160
161 fn validate_like(&self, obj: &serde_json::Map<String, Value>) -> Result<(), ValidationError> {
162 if !obj.contains_key("subject") {
163 return Err(ValidationError::MissingField("subject".to_string()));
164 }
165 if !obj.contains_key("createdAt") {
166 return Err(ValidationError::MissingField("createdAt".to_string()));
167 }
168 self.validate_strong_ref(obj.get("subject"), "subject")?;
169 Ok(())
170 }
171
172 fn validate_repost(&self, obj: &serde_json::Map<String, Value>) -> Result<(), ValidationError> {
173 if !obj.contains_key("subject") {
174 return Err(ValidationError::MissingField("subject".to_string()));
175 }
176 if !obj.contains_key("createdAt") {
177 return Err(ValidationError::MissingField("createdAt".to_string()));
178 }
179 self.validate_strong_ref(obj.get("subject"), "subject")?;
180 Ok(())
181 }
182
183 fn validate_follow(&self, obj: &serde_json::Map<String, Value>) -> Result<(), ValidationError> {
184 if !obj.contains_key("subject") {
185 return Err(ValidationError::MissingField("subject".to_string()));
186 }
187 if !obj.contains_key("createdAt") {
188 return Err(ValidationError::MissingField("createdAt".to_string()));
189 }
190 if let Some(subject) = obj.get("subject").and_then(|v| v.as_str()) {
191 if !subject.starts_with("did:") {
192 return Err(ValidationError::InvalidField {
193 path: "subject".to_string(),
194 message: "Subject must be a DID".to_string(),
195 });
196 }
197 }
198 Ok(())
199 }
200
201 fn validate_block(&self, obj: &serde_json::Map<String, Value>) -> Result<(), ValidationError> {
202 if !obj.contains_key("subject") {
203 return Err(ValidationError::MissingField("subject".to_string()));
204 }
205 if !obj.contains_key("createdAt") {
206 return Err(ValidationError::MissingField("createdAt".to_string()));
207 }
208 if let Some(subject) = obj.get("subject").and_then(|v| v.as_str()) {
209 if !subject.starts_with("did:") {
210 return Err(ValidationError::InvalidField {
211 path: "subject".to_string(),
212 message: "Subject must be a DID".to_string(),
213 });
214 }
215 }
216 Ok(())
217 }
218
219 fn validate_list(&self, obj: &serde_json::Map<String, Value>) -> Result<(), ValidationError> {
220 if !obj.contains_key("name") {
221 return Err(ValidationError::MissingField("name".to_string()));
222 }
223 if !obj.contains_key("purpose") {
224 return Err(ValidationError::MissingField("purpose".to_string()));
225 }
226 if !obj.contains_key("createdAt") {
227 return Err(ValidationError::MissingField("createdAt".to_string()));
228 }
229 if let Some(name) = obj.get("name").and_then(|v| v.as_str()) {
230 if name.is_empty() || name.len() > 64 {
231 return Err(ValidationError::InvalidField {
232 path: "name".to_string(),
233 message: "Name must be 1-64 characters".to_string(),
234 });
235 }
236 }
237 Ok(())
238 }
239
240 fn validate_list_item(&self, obj: &serde_json::Map<String, Value>) -> Result<(), ValidationError> {
241 if !obj.contains_key("subject") {
242 return Err(ValidationError::MissingField("subject".to_string()));
243 }
244 if !obj.contains_key("list") {
245 return Err(ValidationError::MissingField("list".to_string()));
246 }
247 if !obj.contains_key("createdAt") {
248 return Err(ValidationError::MissingField("createdAt".to_string()));
249 }
250 Ok(())
251 }
252
253 fn validate_feed_generator(&self, obj: &serde_json::Map<String, Value>) -> Result<(), ValidationError> {
254 if !obj.contains_key("did") {
255 return Err(ValidationError::MissingField("did".to_string()));
256 }
257 if !obj.contains_key("displayName") {
258 return Err(ValidationError::MissingField("displayName".to_string()));
259 }
260 if !obj.contains_key("createdAt") {
261 return Err(ValidationError::MissingField("createdAt".to_string()));
262 }
263 if let Some(display_name) = obj.get("displayName").and_then(|v| v.as_str()) {
264 if display_name.is_empty() || display_name.len() > 240 {
265 return Err(ValidationError::InvalidField {
266 path: "displayName".to_string(),
267 message: "displayName must be 1-240 characters".to_string(),
268 });
269 }
270 }
271 Ok(())
272 }
273
274 fn validate_threadgate(&self, obj: &serde_json::Map<String, Value>) -> Result<(), ValidationError> {
275 if !obj.contains_key("post") {
276 return Err(ValidationError::MissingField("post".to_string()));
277 }
278 if !obj.contains_key("createdAt") {
279 return Err(ValidationError::MissingField("createdAt".to_string()));
280 }
281 Ok(())
282 }
283
284 fn validate_labeler_service(&self, obj: &serde_json::Map<String, Value>) -> Result<(), ValidationError> {
285 if !obj.contains_key("policies") {
286 return Err(ValidationError::MissingField("policies".to_string()));
287 }
288 if !obj.contains_key("createdAt") {
289 return Err(ValidationError::MissingField("createdAt".to_string()));
290 }
291 Ok(())
292 }
293
294 fn validate_strong_ref(&self, value: Option<&Value>, path: &str) -> Result<(), ValidationError> {
295 let obj = value
296 .and_then(|v| v.as_object())
297 .ok_or_else(|| ValidationError::InvalidField {
298 path: path.to_string(),
299 message: "Must be a strong reference object".to_string(),
300 })?;
301 if !obj.contains_key("uri") {
302 return Err(ValidationError::MissingField(format!("{}/uri", path)));
303 }
304 if !obj.contains_key("cid") {
305 return Err(ValidationError::MissingField(format!("{}/cid", path)));
306 }
307 if let Some(uri) = obj.get("uri").and_then(|v| v.as_str()) {
308 if !uri.starts_with("at://") {
309 return Err(ValidationError::InvalidField {
310 path: format!("{}/uri", path),
311 message: "URI must be an at:// URI".to_string(),
312 });
313 }
314 }
315 Ok(())
316 }
317}
318
319fn validate_datetime(value: &str, path: &str) -> Result<(), ValidationError> {
320 if chrono::DateTime::parse_from_rfc3339(value).is_err() {
321 return Err(ValidationError::InvalidDatetime {
322 path: path.to_string(),
323 });
324 }
325 Ok(())
326}
327
328pub fn validate_record_key(rkey: &str) -> Result<(), ValidationError> {
329 if rkey.is_empty() {
330 return Err(ValidationError::InvalidRecord("Record key cannot be empty".to_string()));
331 }
332 if rkey.len() > 512 {
333 return Err(ValidationError::InvalidRecord("Record key exceeds maximum length of 512".to_string()));
334 }
335 if rkey == "." || rkey == ".." {
336 return Err(ValidationError::InvalidRecord("Record key cannot be '.' or '..'".to_string()));
337 }
338 let valid_chars = rkey.chars().all(|c| {
339 c.is_ascii_alphanumeric() || c == '.' || c == '-' || c == '_' || c == '~'
340 });
341 if !valid_chars {
342 return Err(ValidationError::InvalidRecord(
343 "Record key contains invalid characters (must be alphanumeric, '.', '-', '_', or '~')".to_string()
344 ));
345 }
346 Ok(())
347}
348
349pub fn validate_collection_nsid(collection: &str) -> Result<(), ValidationError> {
350 if collection.is_empty() {
351 return Err(ValidationError::InvalidRecord("Collection NSID cannot be empty".to_string()));
352 }
353 let parts: Vec<&str> = collection.split('.').collect();
354 if parts.len() < 3 {
355 return Err(ValidationError::InvalidRecord(
356 "Collection NSID must have at least 3 segments".to_string()
357 ));
358 }
359 for part in &parts {
360 if part.is_empty() {
361 return Err(ValidationError::InvalidRecord(
362 "Collection NSID segments cannot be empty".to_string()
363 ));
364 }
365 if !part.chars().all(|c| c.is_ascii_alphanumeric() || c == '-') {
366 return Err(ValidationError::InvalidRecord(
367 "Collection NSID segments must be alphanumeric or hyphens".to_string()
368 ));
369 }
370 }
371 Ok(())
372}
373
374#[cfg(test)]
375mod tests {
376 use super::*;
377 use serde_json::json;
378
379 #[test]
380 fn test_validate_post() {
381 let validator = RecordValidator::new();
382 let valid_post = json!({
383 "$type": "app.bsky.feed.post",
384 "text": "Hello, world!",
385 "createdAt": "2024-01-01T00:00:00.000Z"
386 });
387 assert_eq!(
388 validator.validate(&valid_post, "app.bsky.feed.post").unwrap(),
389 ValidationStatus::Valid
390 );
391 }
392
393 #[test]
394 fn test_validate_post_missing_text() {
395 let validator = RecordValidator::new();
396 let invalid_post = json!({
397 "$type": "app.bsky.feed.post",
398 "createdAt": "2024-01-01T00:00:00.000Z"
399 });
400 assert!(validator.validate(&invalid_post, "app.bsky.feed.post").is_err());
401 }
402
403 #[test]
404 fn test_validate_type_mismatch() {
405 let validator = RecordValidator::new();
406 let record = json!({
407 "$type": "app.bsky.feed.like",
408 "subject": {"uri": "at://did:plc:test/app.bsky.feed.post/123", "cid": "bafyrei..."},
409 "createdAt": "2024-01-01T00:00:00.000Z"
410 });
411 let result = validator.validate(&record, "app.bsky.feed.post");
412 assert!(matches!(result, Err(ValidationError::TypeMismatch { .. })));
413 }
414
415 #[test]
416 fn test_validate_unknown_type() {
417 let validator = RecordValidator::new();
418 let record = json!({
419 "$type": "com.example.custom",
420 "data": "test"
421 });
422 assert_eq!(
423 validator.validate(&record, "com.example.custom").unwrap(),
424 ValidationStatus::Unknown
425 );
426 }
427
428 #[test]
429 fn test_validate_unknown_type_strict() {
430 let validator = RecordValidator::new().require_lexicon(true);
431 let record = json!({
432 "$type": "com.example.custom",
433 "data": "test"
434 });
435 let result = validator.validate(&record, "com.example.custom");
436 assert!(matches!(result, Err(ValidationError::UnknownType(_))));
437 }
438
439 #[test]
440 fn test_validate_record_key() {
441 assert!(validate_record_key("valid-key_123").is_ok());
442 assert!(validate_record_key("3k2n5j2").is_ok());
443 assert!(validate_record_key(".").is_err());
444 assert!(validate_record_key("..").is_err());
445 assert!(validate_record_key("").is_err());
446 assert!(validate_record_key("invalid/key").is_err());
447 }
448
449 #[test]
450 fn test_validate_collection_nsid() {
451 assert!(validate_collection_nsid("app.bsky.feed.post").is_ok());
452 assert!(validate_collection_nsid("com.atproto.repo.record").is_ok());
453 assert!(validate_collection_nsid("invalid").is_err());
454 assert!(validate_collection_nsid("a.b").is_err());
455 assert!(validate_collection_nsid("").is_err());
456 }
457}