Add import-content script for migrating static site content to ATProto PDS

+6 -1

bun.lock

··· 4 4 "workspaces": { 5 5 "": { 6 6 "name": "sitebase", 7 + "dependencies": { 8 + "@atproto/api": "^0.18.16", 9 + }, 7 10 "devDependencies": { 8 11 "@biomejs/biome": "^2.3.11", 9 12 "@types/node": "^25.0.9", ··· 66 69 67 70 "@atproto-labs/simple-store-memory": ["@atproto-labs/simple-store-memory@0.1.4", "", { "dependencies": { "@atproto-labs/simple-store": "0.3.0", "lru-cache": "^10.2.0" } }, "sha512-3mKY4dP8I7yKPFj9VKpYyCRzGJOi5CEpOLPlRhoJyLmgs3J4RzDrjn323Oakjz2Aj2JzRU/AIvWRAZVhpYNJHw=="], 68 71 69 - "@atproto/api": ["@atproto/api@0.18.14", "", { "dependencies": { "@atproto/common-web": "^0.4.12", "@atproto/lexicon": "^0.6.0", "@atproto/syntax": "^0.4.2", "@atproto/xrpc": "^0.7.7", "await-lock": "^2.2.2", "multiformats": "^9.9.0", "tlds": "^1.234.0", "zod": "^3.23.8" } }, "sha512-1pWAPbuG3RA1o8uOAwYWZOddvNjuweYOxwTvys1q/r9NCjoGkZY0uJUy1dr6LKFaDk8bjikd2O1cgsRwFfv6Fw=="], 72 + "@atproto/api": ["@atproto/api@0.18.16", "", { "dependencies": { "@atproto/common-web": "^0.4.12", "@atproto/lexicon": "^0.6.0", "@atproto/syntax": "^0.4.2", "@atproto/xrpc": "^0.7.7", "await-lock": "^2.2.2", "multiformats": "^9.9.0", "tlds": "^1.234.0", "zod": "^3.23.8" } }, "sha512-tRGKSWr83pP5CQpSboePU21pE+GqLDYy1XHae4HH4hjaT0pr5V8wNgu70kbKB0B02GVUumeDRpJnlHKD+eMzLg=="], 70 73 71 74 "@atproto/common-web": ["@atproto/common-web@0.4.12", "", { "dependencies": { "@atproto/lex-data": "0.0.8", "@atproto/lex-json": "0.0.8", "zod": "^3.23.8" } }, "sha512-3aCJemqM/fkHQrVPbTCHCdiVstKFI+2LkFLvUhO6XZP0EqUZa/rg/CIZBKTFUWu9I5iYiaEiXL9VwcDRpEevSw=="], 72 75 ··· 171 174 "wordwrap": ["wordwrap@1.0.0", "", {}, "sha512-gvVzJFlPycKc5dZN4yPkP8w7Dc37BtP1yczEneOb4uq34pXZcvrtRTmWV8W+Ume+XCxKgbjM+nevkyFPMybd4Q=="], 172 175 173 176 "zod": ["zod@3.25.76", "", {}, "sha512-gzUt/qt81nXsFGKIFcC3YnfEAx5NkunCfnDlvuBSSFS02bcXu4Lmea0AFIUwbLWxWPx3d9p8S5QoaujKcNQxcQ=="], 177 + 178 + "@sitebase/web/@atproto/api": ["@atproto/api@0.18.14", "", { "dependencies": { "@atproto/common-web": "^0.4.12", "@atproto/lexicon": "^0.6.0", "@atproto/syntax": "^0.4.2", "@atproto/xrpc": "^0.7.7", "await-lock": "^2.2.2", "multiformats": "^9.9.0", "tlds": "^1.234.0", "zod": "^3.23.8" } }, "sha512-1pWAPbuG3RA1o8uOAwYWZOddvNjuweYOxwTvys1q/r9NCjoGkZY0uJUy1dr6LKFaDk8bjikd2O1cgsRwFfv6Fw=="], 174 179 175 180 "bun-types/@types/node": ["@types/node@25.0.8", "", { "dependencies": { "undici-types": "~7.16.0" } }, "sha512-powIePYMmC3ibL0UJ2i2s0WIbq6cg6UyVFQxSCpaPxxzAaziRfimGivjdF943sSGV6RADVbk0Nvlm5P/FB44Zg=="], 176 181 }

+5 -1

package.json

··· 16 16 "format:check": "biome format packages/", 17 17 "lint": "biome lint packages/ --write", 18 18 "lint:check": "biome lint packages/", 19 - "typecheck": "bun run --workspaces typecheck" 19 + "typecheck": "bun run --workspaces typecheck", 20 + "import": "bun run scripts/import-content.ts" 21 + }, 22 + "dependencies": { 23 + "@atproto/api": "^0.18.16" 20 24 } 21 25 }

+361

scripts/import-content.ts

··· 1 + #!/usr/bin/env bun 2 + /** 3 + * Import static site content to ATProto PDS as standard.site documents 4 + * 5 + * Usage: 6 + * bun run scripts/import-content.ts --content-dir <path> --publication <at-uri> --identifier <handle-or-did> --password <app-password> 7 + * 8 + * Environment variables (alternative to CLI args): 9 + * ATPROTO_IDENTIFIER - Your handle or DID 10 + * ATPROTO_PASSWORD - App password (create at https://bsky.app/settings/app-passwords) 11 + * PDS_URL - PDS endpoint (defaults to https://bsky.social) 12 + */ 13 + 14 + import { readdir, readFile, stat } from "node:fs/promises"; 15 + import { basename, extname, join, relative } from "node:path"; 16 + import { AtpAgent } from "@atproto/api"; 17 + 18 + const DOCUMENT_COLLECTION = "site.standard.document"; 19 + 20 + interface Frontmatter { 21 + title?: string; 22 + date?: string; 23 + tags?: string[]; 24 + author?: { name?: string; uri?: string }; 25 + description?: string; 26 + view?: string; 27 + } 28 + 29 + interface ParsedDocument { 30 + frontmatter: Frontmatter; 31 + content: string; 32 + filePath: string; 33 + relativePath: string; 34 + } 35 + 36 + /** 37 + * Parse YAML-like frontmatter from markdown content 38 + */ 39 + function parseFrontmatter(content: string): { frontmatter: Frontmatter; body: string } { 40 + const frontmatterMatch = content.match(/^---\r?\n([\s\S]*?)\r?\n---\r?\n([\s\S]*)$/); 41 + 42 + if (!frontmatterMatch) { 43 + return { frontmatter: {}, body: content }; 44 + } 45 + 46 + const [, yamlContent, body] = frontmatterMatch; 47 + const frontmatter: Frontmatter = {}; 48 + 49 + // Simple YAML parser for our needs 50 + const lines = yamlContent!.split("\n"); 51 + let currentKey: string | null = null; 52 + let currentArray: string[] | null = null; 53 + let inAuthor = false; 54 + let authorObj: { name?: string; uri?: string } = {}; 55 + 56 + for (const line of lines) { 57 + const trimmed = line.trim(); 58 + if (!trimmed) continue; 59 + 60 + // Array item 61 + if (trimmed.startsWith("- ") && currentKey) { 62 + const value = trimmed.slice(2).trim().replace(/^["']|["']$/g, ""); 63 + if (currentArray) { 64 + currentArray.push(value); 65 + } 66 + continue; 67 + } 68 + 69 + // Nested key (for author) 70 + if (line.startsWith(" ") && inAuthor) { 71 + const match = trimmed.match(/^(\w+):\s*(.*)$/); 72 + if (match) { 73 + const [, key, value] = match; 74 + const cleanValue = value?.replace(/^["']|["']$/g, "") || ""; 75 + if (key === "name") authorObj.name = cleanValue; 76 + if (key === "uri") authorObj.uri = cleanValue; 77 + } 78 + continue; 79 + } 80 + 81 + // Key-value pair 82 + const kvMatch = trimmed.match(/^(\w+):\s*(.*)$/); 83 + if (kvMatch) { 84 + const [, key, rawValue] = kvMatch; 85 + const value = rawValue?.trim(); 86 + 87 + // Save previous author object 88 + if (inAuthor && currentKey === "author") { 89 + frontmatter.author = authorObj; 90 + authorObj = {}; 91 + } 92 + inAuthor = false; 93 + currentArray = null; 94 + 95 + if (!value) { 96 + // Could be array or nested object 97 + if (key === "tags") { 98 + currentKey = key; 99 + currentArray = []; 100 + frontmatter.tags = currentArray; 101 + } else if (key === "author") { 102 + currentKey = key; 103 + inAuthor = true; 104 + } 105 + } else { 106 + // Direct value 107 + const cleanValue = value.replace(/^["']|["']$/g, ""); 108 + if (key === "title") frontmatter.title = cleanValue; 109 + if (key === "date") frontmatter.date = cleanValue; 110 + if (key === "description") frontmatter.description = cleanValue; 111 + if (key === "view") frontmatter.view = cleanValue; 112 + if (key === "tags" && value.startsWith("[")) { 113 + // Inline array like tags: ["a", "b"] 114 + frontmatter.tags = value 115 + .slice(1, -1) 116 + .split(",") 117 + .map((t) => t.trim().replace(/^["']|["']$/g, "")); 118 + } 119 + currentKey = key; 120 + } 121 + } 122 + } 123 + 124 + // Save final author if needed 125 + if (inAuthor) { 126 + frontmatter.author = authorObj; 127 + } 128 + 129 + return { frontmatter, body: body || "" }; 130 + } 131 + 132 + /** 133 + * Generate a record key from a file path 134 + * e.g., "notes/2025-03-16_grounding-questions.md" -> "notes-2025-03-16-grounding-questions" 135 + */ 136 + function generateRkey(relativePath: string): string { 137 + const withoutExt = relativePath.replace(/\.(md|html)$/, ""); 138 + // Replace path separators and underscores with dashes, remove invalid chars 139 + return withoutExt 140 + .replace(/[/\\]/g, "-") 141 + .replace(/_/g, "-") 142 + .replace(/[^a-zA-Z0-9-]/g, "") 143 + .toLowerCase() 144 + .slice(0, 512); // ATProto rkey max length 145 + } 146 + 147 + /** 148 + * Convert file path to URL path, removing date prefix from filename 149 + * e.g., "notes/2025-03-16_grounding-questions.md" -> "/notes/grounding-questions" 150 + */ 151 + function generatePath(relativePath: string): string { 152 + const withoutExt = relativePath.replace(/\.(md|html)$/, ""); 153 + // Handle index files 154 + if (basename(withoutExt) === "index") { 155 + const dir = withoutExt.replace(/\/?index$/, ""); 156 + return dir ? `/${dir}` : "/"; 157 + } 158 + // Remove date prefix (YYYY-MM-DD_) from filename 159 + const parts = withoutExt.split("/"); 160 + const filename = parts[parts.length - 1]; 161 + const filenameWithoutDate = filename?.replace(/^\d{4}-\d{2}-\d{2}_/, "") || filename; 162 + parts[parts.length - 1] = filenameWithoutDate; 163 + return `/${parts.join("/")}`; 164 + } 165 + 166 + /** 167 + * Recursively find all content files 168 + */ 169 + async function findContentFiles(dir: string, baseDir: string = dir): Promise<string[]> { 170 + const files: string[] = []; 171 + const entries = await readdir(dir, { withFileTypes: true }); 172 + 173 + for (const entry of entries) { 174 + const fullPath = join(dir, entry.name); 175 + if (entry.isDirectory()) { 176 + const subFiles = await findContentFiles(fullPath, baseDir); 177 + files.push(...subFiles); 178 + } else if (entry.isFile() && /\.(md|html)$/.test(entry.name)) { 179 + files.push(fullPath); 180 + } 181 + } 182 + 183 + return files; 184 + } 185 + 186 + /** 187 + * Parse a content file into a document 188 + */ 189 + async function parseContentFile(filePath: string, baseDir: string): Promise<ParsedDocument> { 190 + const content = await readFile(filePath, "utf-8"); 191 + const relativePath = relative(baseDir, filePath); 192 + const { frontmatter, body } = parseFrontmatter(content); 193 + 194 + return { 195 + frontmatter, 196 + content: body, 197 + filePath, 198 + relativePath, 199 + }; 200 + } 201 + 202 + /** 203 + * Create a standard.site document record 204 + */ 205 + function createDocumentRecord( 206 + doc: ParsedDocument, 207 + publicationUri: string, 208 + ): { 209 + rkey: string; 210 + record: Record<string, unknown>; 211 + } { 212 + const rkey = generateRkey(doc.relativePath); 213 + const path = generatePath(doc.relativePath); 214 + 215 + const record: Record<string, unknown> = { 216 + $type: DOCUMENT_COLLECTION, 217 + site: publicationUri, 218 + title: doc.frontmatter.title || basename(doc.relativePath, extname(doc.relativePath)), 219 + path, 220 + textContent: doc.content, 221 + createdAt: new Date().toISOString(), 222 + }; 223 + 224 + // Add optional fields 225 + if (doc.frontmatter.description) { 226 + record.description = doc.frontmatter.description; 227 + } 228 + 229 + if (doc.frontmatter.tags && doc.frontmatter.tags.length > 0) { 230 + record.tags = doc.frontmatter.tags; 231 + } 232 + 233 + if (doc.frontmatter.date) { 234 + // Parse date and convert to ISO string 235 + const date = new Date(doc.frontmatter.date); 236 + if (!isNaN(date.getTime())) { 237 + record.publishedAt = date.toISOString(); 238 + } 239 + } 240 + 241 + return { rkey, record }; 242 + } 243 + 244 + async function main() { 245 + // Parse arguments 246 + const args = process.argv.slice(2); 247 + const getArg = (name: string): string | undefined => { 248 + const idx = args.indexOf(`--${name}`); 249 + return idx !== -1 ? args[idx + 1] : undefined; 250 + }; 251 + 252 + const contentDir = getArg("content-dir"); 253 + const publicationUri = getArg("publication"); 254 + const identifier = getArg("identifier") || process.env.ATPROTO_IDENTIFIER; 255 + const password = getArg("password") || process.env.ATPROTO_PASSWORD; 256 + const pdsUrl = getArg("pds") || process.env.PDS_URL || "https://bsky.social"; 257 + const dryRun = args.includes("--dry-run"); 258 + 259 + if (!contentDir || !publicationUri || !identifier || !password) { 260 + console.error(`Usage: bun run scripts/import-content.ts \\ 261 + --content-dir <path> \\ 262 + --publication <at-uri> \\ 263 + --identifier <handle-or-did> \\ 264 + --password <app-password> \\ 265 + [--pds <pds-url>] \\ 266 + [--dry-run] 267 + 268 + Environment variables: 269 + ATPROTO_IDENTIFIER - Your handle or DID 270 + ATPROTO_PASSWORD - App password 271 + PDS_URL - PDS endpoint (default: https://bsky.social) 272 + `); 273 + process.exit(1); 274 + } 275 + 276 + console.log(`Content directory: ${contentDir}`); 277 + console.log(`Publication: ${publicationUri}`); 278 + console.log(`PDS: ${pdsUrl}`); 279 + console.log(`Dry run: ${dryRun}`); 280 + console.log(); 281 + 282 + // Verify content directory exists 283 + try { 284 + const stats = await stat(contentDir); 285 + if (!stats.isDirectory()) { 286 + console.error(`Error: ${contentDir} is not a directory`); 287 + process.exit(1); 288 + } 289 + } catch { 290 + console.error(`Error: ${contentDir} does not exist`); 291 + process.exit(1); 292 + } 293 + 294 + // Find and parse all content files 295 + console.log("Scanning for content files..."); 296 + const files = await findContentFiles(contentDir); 297 + console.log(`Found ${files.length} files\n`); 298 + 299 + const documents: ParsedDocument[] = []; 300 + for (const file of files) { 301 + const doc = await parseContentFile(file, contentDir); 302 + documents.push(doc); 303 + } 304 + 305 + // Create ATProto agent and authenticate 306 + const agent = new AtpAgent({ service: pdsUrl }); 307 + 308 + if (!dryRun) { 309 + console.log(`Authenticating as ${identifier}...`); 310 + await agent.login({ identifier, password }); 311 + console.log(`Authenticated as ${agent.session?.did}\n`); 312 + } 313 + 314 + // Process each document 315 + let created = 0; 316 + let skipped = 0; 317 + let failed = 0; 318 + 319 + for (const doc of documents) { 320 + const { rkey, record } = createDocumentRecord(doc, publicationUri); 321 + 322 + console.log(`Processing: ${doc.relativePath}`); 323 + console.log(` Title: ${record.title}`); 324 + console.log(` Path: ${record.path}`); 325 + console.log(` Rkey: ${rkey}`); 326 + if (record.tags) console.log(` Tags: ${(record.tags as string[]).join(", ")}`); 327 + if (record.publishedAt) console.log(` Published: ${record.publishedAt}`); 328 + 329 + if (dryRun) { 330 + console.log(` [DRY RUN] Would create record\n`); 331 + created++; 332 + continue; 333 + } 334 + 335 + try { 336 + await agent.api.com.atproto.repo.putRecord({ 337 + repo: agent.session!.did, 338 + collection: DOCUMENT_COLLECTION, 339 + rkey, 340 + record, 341 + }); 342 + console.log(` ✓ Created\n`); 343 + created++; 344 + } catch (error) { 345 + const message = error instanceof Error ? error.message : String(error); 346 + console.log(` ✗ Failed: ${message}\n`); 347 + failed++; 348 + } 349 + } 350 + 351 + console.log("---"); 352 + console.log(`Summary:`); 353 + console.log(` Created: ${created}`); 354 + console.log(` Skipped: ${skipped}`); 355 + console.log(` Failed: ${failed}`); 356 + } 357 + 358 + main().catch((error) => { 359 + console.error("Fatal error:", error); 360 + process.exit(1); 361 + });