a collection of lightweight TypeScript packages for AT Protocol, the protocol powering Bluesky

feat(lexicon-doc): validate default/const/known values with the given constraints

mary.my.id 3efa7024 30ccef36

verified
+480 -40
+6
.changeset/thin-papayas-study.md
··· 1 + --- 2 + '@atcute/bluemoji': patch 3 + '@atcute/lexicon-doc': patch 4 + --- 5 + 6 + validate default/const/known values with the given constraints
+1 -1
lexdocs/bluemoji/blue.moji/collection/item.json
··· 29 29 "refs": ["com.atproto.label.defs#selfLabels"] 30 30 }, 31 31 "copyOf": { "type": "string", "format": "at-uri" }, 32 - "fallbackText": { "type": "string", "maxLength": 1, "default": "◌" } 32 + "fallbackText": { "type": "string", "maxLength": 10, "maxGraphemes": 1, "default": "◌" } 33 33 } 34 34 } 35 35 },
+4 -1
packages/definitions/bluemoji/lib/lexicons/types/blue/moji/collection/item.ts
··· 45 45 copyOf: /*#__PURE__*/ v.optional(/*#__PURE__*/ v.resourceUriString()), 46 46 createdAt: /*#__PURE__*/ v.datetimeString(), 47 47 fallbackText: /*#__PURE__*/ v.optional( 48 - /*#__PURE__*/ v.constrain(/*#__PURE__*/ v.string(), [/*#__PURE__*/ v.stringLength(0, 1)]), 48 + /*#__PURE__*/ v.constrain(/*#__PURE__*/ v.string(), [ 49 + /*#__PURE__*/ v.stringLength(0, 10), 50 + /*#__PURE__*/ v.stringGraphemes(0, 1), 51 + ]), 49 52 '◌', 50 53 ), 51 54 get formats() {
+343 -38
packages/lexicons/lexicon-doc/lib/schema.ts
··· 1 1 import * as v from '@badrap/valita'; 2 2 3 + import { isWithinGraphemeBounds, isWithinUtf8Bounds } from './utils.js'; 4 + 3 5 // tsc dislikes this schema with the amount of type expansion that happens here. 4 6 // the interface declaration allows tsc to just reference it instead of 5 7 // expanding on every type reference. ··· 32 34 export interface $schema extends $schematype {} 33 35 } 34 36 35 - const _lexInteger = v.object({ 36 - type: v.literal('integer'), 37 - description: v.string().optional(), 38 - default: integer.optional(), 39 - minimum: integer.optional(), 40 - maximum: integer.optional(), 41 - enum: v.array(integer).optional(), 42 - const: integer.optional(), 43 - }); 37 + const _lexInteger = v 38 + .object({ 39 + type: v.literal('integer'), 40 + description: v.string().optional(), 41 + default: integer.optional(), 42 + minimum: integer.optional(), 43 + maximum: integer.optional(), 44 + enum: v.array(integer).optional(), 45 + const: integer.optional(), 46 + }) 47 + .chain((input) => { 48 + const { 49 + minimum = 0, 50 + maximum = Infinity, 51 + const: constValue, 52 + default: defaultValue, 53 + enum: enumValues, 54 + } = input; 55 + 56 + if (minimum > maximum) { 57 + return v.err({ 58 + message: `minimum value can't be greater than maximum value`, 59 + path: ['minimum'], 60 + }); 61 + } 62 + 63 + if (defaultValue !== undefined) { 64 + if (defaultValue < minimum) { 65 + return v.err({ 66 + message: `default value can't be lower than minimum value`, 67 + path: ['default'], 68 + }); 69 + } 70 + 71 + if (defaultValue > maximum) { 72 + return v.err({ 73 + message: `default value can't be greater than maximum value`, 74 + path: ['default'], 75 + }); 76 + } 77 + } 78 + 79 + if (constValue !== undefined) { 80 + if (constValue < minimum) { 81 + return v.err({ 82 + message: `const value can't be lower than minimum value`, 83 + path: ['const'], 84 + }); 85 + } 86 + 87 + if (constValue > maximum) { 88 + return v.err({ 89 + message: `const value can't be greater than maximum value`, 90 + path: ['const'], 91 + }); 92 + } 93 + } 94 + 95 + if (enumValues !== undefined) { 96 + for (let idx = 0, len = enumValues.length; idx < len; idx++) { 97 + const enumValue = enumValues[idx]; 98 + 99 + if (enumValue < minimum) { 100 + return v.err({ 101 + message: `enum value can't be lower than minimum value`, 102 + path: ['enum', idx], 103 + }); 104 + } 105 + 106 + if (enumValue > maximum) { 107 + return v.err({ 108 + message: `enum value can't be greater than maximum value`, 109 + path: ['enum', idx], 110 + }); 111 + } 112 + } 113 + } 114 + 115 + return v.ok(input); 116 + }); 44 117 45 118 export const lexInteger = _lexInteger as lexInteger.$schema; 46 119 export interface LexInteger extends v.Infer<typeof lexInteger> {} ··· 74 147 export interface $schema extends $schematype {} 75 148 } 76 149 77 - const _lexString = v.object({ 78 - type: v.literal('string'), 79 - format: lexStringFormat.optional(), 80 - description: v.string().optional(), 81 - default: v.string().optional(), 82 - minLength: integer.optional(), 83 - maxLength: integer.optional(), 84 - minGraphemes: integer.optional(), 85 - maxGraphemes: integer.optional(), 86 - enum: v.array(v.string()).optional(), 87 - const: v.string().optional(), 88 - knownValues: v.array(v.string()).optional(), 89 - }); 150 + const _lexString = v 151 + .object({ 152 + type: v.literal('string'), 153 + format: lexStringFormat.optional(), 154 + description: v.string().optional(), 155 + default: v.string().optional(), 156 + minLength: integer.optional(), 157 + maxLength: integer.optional(), 158 + minGraphemes: integer.optional(), 159 + maxGraphemes: integer.optional(), 160 + enum: v.array(v.string()).optional(), 161 + const: v.string().optional(), 162 + knownValues: v.array(v.string()).optional(), 163 + }) 164 + .chain((input) => { 165 + const { 166 + minLength = 0, 167 + maxLength = Infinity, 168 + minGraphemes = 0, 169 + maxGraphemes = Infinity, 170 + const: constValue, 171 + default: defaultValue, 172 + enum: enumValues, 173 + knownValues, 174 + } = input; 175 + 176 + if (minLength > maxLength) { 177 + return v.err({ 178 + message: `minimum string length can't be greater than maximum string length`, 179 + path: ['minLength'], 180 + }); 181 + } 182 + 183 + if (minGraphemes > maxGraphemes) { 184 + return v.err({ 185 + message: `minimum grapheme count can't be greater than maximum grapheme count`, 186 + path: ['minGraphemes'], 187 + }); 188 + } 189 + 190 + if (defaultValue !== undefined) { 191 + { 192 + const bound = isWithinUtf8Bounds(defaultValue, minLength, maxLength); 193 + 194 + if (bound === 'min') { 195 + return v.err({ 196 + message: `default value can't be shorter than minimum string length`, 197 + path: ['default'], 198 + }); 199 + } 200 + 201 + if (bound === 'max') { 202 + return v.err({ 203 + message: `default value can't be longer than maximum string length`, 204 + path: ['default'], 205 + }); 206 + } 207 + } 208 + 209 + { 210 + const bound = isWithinGraphemeBounds(defaultValue, minLength, maxLength); 211 + 212 + if (bound === 'min') { 213 + return v.err({ 214 + message: `default value can't be shorter than minimum grapheme count`, 215 + path: ['default'], 216 + }); 217 + } 218 + 219 + if (bound === 'max') { 220 + return v.err({ 221 + message: `default value can't be longer than minimum grapheme count`, 222 + path: ['default'], 223 + }); 224 + } 225 + } 226 + } 227 + 228 + if (constValue !== undefined) { 229 + { 230 + const bound = isWithinUtf8Bounds(constValue, minLength, maxLength); 231 + 232 + if (bound === 'min') { 233 + return v.err({ 234 + message: `const value can't be shorter than minimum string length`, 235 + path: ['const'], 236 + }); 237 + } 238 + 239 + if (bound === 'max') { 240 + return v.err({ 241 + message: `const value can't be longer than maximum string length`, 242 + path: ['const'], 243 + }); 244 + } 245 + } 246 + 247 + { 248 + const bound = isWithinGraphemeBounds(constValue, minLength, maxLength); 249 + 250 + if (bound === 'min') { 251 + return v.err({ 252 + message: `const value can't be shorter than minimum grapheme count`, 253 + path: ['const'], 254 + }); 255 + } 256 + 257 + if (bound === 'max') { 258 + return v.err({ 259 + message: `const value can't be longer than minimum grapheme count`, 260 + path: ['const'], 261 + }); 262 + } 263 + } 264 + } 265 + 266 + if (enumValues !== undefined) { 267 + for (let idx = 0, len = enumValues.length; idx < len; idx++) { 268 + const enumValue = enumValues[idx]; 269 + 270 + { 271 + const bound = isWithinUtf8Bounds(enumValue, minLength, maxLength); 272 + 273 + if (bound === 'min') { 274 + return v.err({ 275 + message: `enum value can't be shorter than minimum string length`, 276 + path: ['enum', idx], 277 + }); 278 + } 279 + 280 + if (bound === 'max') { 281 + return v.err({ 282 + message: `enum value can't be longer than maximum string length`, 283 + path: ['enum', idx], 284 + }); 285 + } 286 + } 287 + 288 + { 289 + const bound = isWithinGraphemeBounds(enumValue, minGraphemes, maxGraphemes); 290 + 291 + if (bound === 'min') { 292 + return v.err({ 293 + message: `enum value can't have fewer graphemes than minimum grapheme count`, 294 + path: ['enum', idx], 295 + }); 296 + } 297 + 298 + if (bound === 'max') { 299 + return v.err({ 300 + message: `enum value can't have more graphemes than maximum grapheme count`, 301 + path: ['enum', idx], 302 + }); 303 + } 304 + } 305 + } 306 + } 307 + 308 + if (knownValues !== undefined) { 309 + for (let idx = 0, len = knownValues.length; idx < len; idx++) { 310 + const knownValue = knownValues[idx]; 311 + 312 + { 313 + const bound = isWithinUtf8Bounds(knownValue, minLength, maxLength); 314 + 315 + if (bound === 'min') { 316 + return v.err({ 317 + message: `known value can't be shorter than minimum string length`, 318 + path: ['known', idx], 319 + }); 320 + } 321 + 322 + if (bound === 'max') { 323 + return v.err({ 324 + message: `known value can't be longer than maximum string length`, 325 + path: ['known', idx], 326 + }); 327 + } 328 + } 329 + 330 + { 331 + const bound = isWithinGraphemeBounds(knownValue, minGraphemes, maxGraphemes); 332 + 333 + if (bound === 'min') { 334 + return v.err({ 335 + message: `known value can't have fewer graphemes than minimum grapheme count`, 336 + path: ['known', idx], 337 + }); 338 + } 339 + 340 + if (bound === 'max') { 341 + return v.err({ 342 + message: `known value can't have more graphemes than maximum grapheme count`, 343 + path: ['known', idx], 344 + }); 345 + } 346 + } 347 + } 348 + } 349 + 350 + return v.ok(input); 351 + }); 90 352 91 353 export const lexString = _lexString as lexString.$schema; 92 354 export interface LexString extends v.Infer<typeof lexString> {} ··· 122 384 export interface $schema extends $schematype {} 123 385 } 124 386 125 - const _lexBytes = v.object({ 126 - type: v.literal('bytes'), 127 - description: v.string().optional(), 128 - minLength: integer.optional(), 129 - maxLength: integer.optional(), 130 - }); 387 + const _lexBytes = v 388 + .object({ 389 + type: v.literal('bytes'), 390 + description: v.string().optional(), 391 + minLength: integer.optional(), 392 + maxLength: integer.optional(), 393 + }) 394 + .chain((input) => { 395 + const { minLength = 0, maxLength = Infinity } = input; 396 + 397 + if (minLength > maxLength) { 398 + return v.err({ 399 + message: `minimum byte length can't be greater than maximum byte length`, 400 + path: ['minLength'], 401 + }); 402 + } 403 + 404 + return v.ok(input); 405 + }); 131 406 132 407 export const lexBytes = _lexBytes as lexBytes.$schema; 133 408 export interface LexBytes extends v.Infer<typeof lexBytes> {} ··· 226 501 export interface $schema extends $schematype {} 227 502 } 228 503 229 - const _lexArray = v.object({ 230 - type: v.literal('array'), 231 - description: v.string().optional(), 232 - items: v.union(lexPrimitive, lexIpldType, lexRefVariant, lexBlob), 233 - minLength: integer.optional(), 234 - maxLength: integer.optional(), 235 - }); 504 + const _lexArray = v 505 + .object({ 506 + type: v.literal('array'), 507 + description: v.string().optional(), 508 + items: v.union(lexPrimitive, lexIpldType, lexRefVariant, lexBlob), 509 + minLength: integer.optional(), 510 + maxLength: integer.optional(), 511 + }) 512 + .chain((input) => { 513 + const { minLength = 0, maxLength = Infinity } = input; 514 + 515 + if (minLength > maxLength) { 516 + return v.err({ 517 + message: `minimum array length can't be greater than maximum array length`, 518 + path: ['minLength'], 519 + }); 520 + } 521 + 522 + return v.ok(input); 523 + }); 236 524 237 525 export const lexArray = _lexArray as lexArray.$schema; 238 526 export interface LexArray extends v.Infer<typeof lexArray> {} ··· 243 531 export interface $schema extends $schematype {} 244 532 } 245 533 246 - const _lexPrimitiveArray = lexArray.extend({ 247 - items: lexPrimitive, 248 - }); 534 + const _lexPrimitiveArray = v 535 + .object({ 536 + type: v.literal('array'), 537 + description: v.string().optional(), 538 + items: lexPrimitive, 539 + minLength: integer.optional(), 540 + maxLength: integer.optional(), 541 + }) 542 + .chain((input) => { 543 + const { minLength = 0, maxLength = Infinity } = input; 544 + 545 + if (minLength > maxLength) { 546 + return v.err({ 547 + message: `minimum array length can't be greater than maximum array length`, 548 + path: ['minLength'], 549 + }); 550 + } 551 + 552 + return v.ok(input); 553 + }); 249 554 250 555 export const lexPrimitiveArray = _lexPrimitiveArray as lexPrimitiveArray.$schema; 251 556 export interface LexPrimitiveArray extends v.Infer<typeof lexPrimitiveArray> {}
+126
packages/lexicons/lexicon-doc/lib/utils.ts
··· 1 + const segmenter = new Intl.Segmenter(); 2 + 3 + export const isWithinUtf8Bounds = (input: string, min = 0, max = Infinity): 'max' | 'min' | undefined => { 4 + const maybeUtf8Len = input.length * 3; 5 + 6 + // fail early if we're still less than minimum length 7 + if (maybeUtf8Len < min) { 8 + return 'min'; 9 + } 10 + 11 + // skip if we're still within maximum length 12 + if (maybeUtf8Len <= max) { 13 + return undefined; 14 + } 15 + 16 + const utf8Len = getUtf8Length(input); 17 + 18 + if (utf8Len < min) { 19 + return 'min'; 20 + } 21 + 22 + if (utf8Len > max) { 23 + return 'max'; 24 + } 25 + 26 + return undefined; 27 + }; 28 + 29 + export const isWithinGraphemeBounds = (input: string, min = 0, max = Infinity): 'max' | 'min' | undefined => { 30 + // grapheme conversion is expensive, so we're going to do some safe naive 31 + // checks where we assume 1 UTF-16 character = 1 grapheme. 32 + 33 + const utf16Len = input.length; 34 + 35 + // fail early if UTF-16 length is less than grapheme length 36 + if (utf16Len < min) { 37 + return 'min'; 38 + } 39 + 40 + // skip if we're still within maximum constraint 41 + if (utf16Len <= max) { 42 + return undefined; 43 + } 44 + 45 + const graphemeLen = getGraphemeLength(input); 46 + 47 + if (graphemeLen < min) { 48 + return 'min'; 49 + } 50 + 51 + if (graphemeLen > max) { 52 + return 'max'; 53 + } 54 + 55 + return undefined; 56 + }; 57 + 58 + export const getUtf8Length = (str: string): number => { 59 + const len = str.length; 60 + 61 + let u16pos = 0; 62 + let u8pos = 0; 63 + 64 + jump: if (str.charCodeAt(0) < 0x80) { 65 + u16pos++; 66 + u8pos++; 67 + 68 + while (u16pos + 3 < len) { 69 + const a = str.charCodeAt(u16pos); 70 + const b = str.charCodeAt(u16pos + 1); 71 + const c = str.charCodeAt(u16pos + 2); 72 + const d = str.charCodeAt(u16pos + 3); 73 + 74 + if ((a | b | c | d) >= 0x80) { 75 + break jump; 76 + } 77 + 78 + u16pos += 4; 79 + u8pos += 4; 80 + } 81 + 82 + while (u16pos < len) { 83 + const x = str.charCodeAt(u16pos); 84 + 85 + if (x >= 0x80) { 86 + break jump; 87 + } 88 + 89 + u16pos++; 90 + u8pos++; 91 + } 92 + 93 + return u8pos; 94 + } 95 + 96 + while (u16pos < len) { 97 + const code = str.charCodeAt(u16pos); 98 + 99 + if (code < 0x80) { 100 + u16pos += 1; 101 + u8pos += 1; 102 + } else if (code < 0x800) { 103 + u16pos += 1; 104 + u8pos += 2; 105 + } else if (code < 0xd800 || code > 0xdbff) { 106 + u16pos += 1; 107 + u8pos += 3; 108 + } else { 109 + u16pos += 2; 110 + u8pos += 4; 111 + } 112 + } 113 + 114 + return u8pos; 115 + }; 116 + 117 + export const getGraphemeLength = (text: string): number => { 118 + const iterator = segmenter.segment(text)[Symbol.iterator](); 119 + let count = 0; 120 + 121 + while (!iterator.next().done) { 122 + count++; 123 + } 124 + 125 + return count; 126 + };