Markdown -> Semble importer
at canon 555 lines 18 kB view raw
1<script lang="ts"> 2 import {onMount} from "svelte"; 3 import type {OAuthSession} from "@atproto/oauth-client-browser"; 4 import {listSembleRepoRecords} from "./lib/atproto/repo"; 5 import {buildMockAtprotoRecords} from "./lib/atproto/record-builder"; 6 import {createSembleRecords} from "./lib/atproto/create"; 7 import {mapCitoidPayloadToSembleRecord} from "./lib/metadata/citoid-mapper"; 8 import {UrlClassifier} from "./lib/metadata/url-classifier"; 9 import type {SembleUrlMetadata} from "./lib/metadata/citoid"; 10 import {parseMarkdownSource} from "./lib/parse/markdown"; 11 import {normalizeSourceUrl, isProbablyUrl} from "./lib/utils/urls"; 12 import type {SembleCard, SembleCollection, SembleCollectionLink} from "./lib/semble/types"; 13 import type {RepoRecordGroups} from "./lib/atproto/diff"; 14 import {initOAuthClient, startOAuthSignIn, revokeOAuthSession, getOAuthDefaults} from "./lib/atproto/oauth"; 15 16 let session: OAuthSession | null = null; 17 let sessionHandle = ""; 18 let oauthInfo = getOAuthDefaults(); 19 let authError = ""; 20 let statusMessage = ""; 21 22 const env = import.meta.env; 23 let handle = ""; 24 const clientId = env.VITE_CLIENT_ID ?? ""; 25 const redirectUri = env.VITE_REDIRECT_URI ?? ""; 26 27 let sourceUrl = ""; 28 let sourceText = ""; 29 let parsedCards: SembleCard[] = []; 30 let parsedCollections: SembleCollection[] = []; 31 32 let repoRecords: RepoRecordGroups | null = null; 33 let repoCardUrls = new Set<string>(); 34 let repoCollectionNames = new Set<string>(); 35 36 let stagedCards: SembleCard[] = []; 37 let stagedCollections: SembleCollection[] = []; 38 let stagedLinks: SembleCollectionLink[] = []; 39 let draftRecords: ReturnType<typeof buildMockAtprotoRecords> | null = null; 40 const repoListLimit = 50; 41 42 let busy = false; 43 44 onMount(async () => { 45 try { 46 const result = await initOAuthClient(); 47 oauthInfo = getOAuthDefaults(); 48 if (result.session) { 49 session = result.session; 50 statusMessage = result.state ? "OAuth login complete." : "OAuth session restored."; 51 await loadRepo(); 52 } 53 } catch (error) { 54 authError = error instanceof Error ? error.message : String(error); 55 } 56 }); 57 58 async function startOAuth() { 59 authError = ""; 60 statusMessage = ""; 61 if (!handle.trim()) { 62 authError = "Enter your handle first."; 63 return; 64 } 65 if (!clientId || !redirectUri) { 66 authError = "Missing client metadata or redirect URI. Check your env config."; 67 return; 68 } 69 try { 70 await startOAuthSignIn(handle.trim()); 71 } catch (error) { 72 authError = error instanceof Error ? error.message : String(error); 73 } 74 } 75 76 async function signOut() { 77 if (session) { 78 await revokeOAuthSession(session); 79 } 80 session = null; 81 sessionHandle = ""; 82 } 83 84 async function fetchSource() { 85 statusMessage = ""; 86 authError = ""; 87 parsedCards = []; 88 parsedCollections = []; 89 stagedCards = []; 90 stagedCollections = []; 91 stagedLinks = []; 92 draftRecords = null; 93 94 const trimmedUrl = sourceUrl.trim(); 95 const hasUrl = trimmedUrl.length > 0 && isProbablyUrl(trimmedUrl); 96 if (!hasUrl && !sourceText.trim()) { 97 authError = "Provide a source URL or paste text."; 98 return; 99 } 100 101 busy = true; 102 try { 103 let text: string; 104 let sourceId = "manual-input"; 105 if (hasUrl) { 106 const normalized = normalizeSourceUrl(trimmedUrl); 107 sourceId = normalized; 108 const response = await fetch(normalized); 109 if (!response.ok) { 110 throw new Error(`Source fetch failed: ${response.status}`); 111 } 112 text = await response.text(); 113 } else { 114 text = sourceText; 115 } 116 117 const parsed = parseMarkdownSource(sourceId, text); 118 parsedCards = parsed.cards; 119 parsedCollections = parsed.collections; 120 statusMessage = `Parsed ${parsedCards.length} cards.`; 121 if (session) { 122 if (!repoRecords) { 123 await loadRepo(false); 124 } 125 await stageRecords(false); 126 } else { 127 statusMessage = `Parsed ${parsedCards.length} cards. Sign in to stage new records.`; 128 } 129 } catch (error) { 130 authError = error instanceof Error ? error.message : String(error); 131 } finally { 132 busy = false; 133 } 134 } 135 136 async function loadRepo(withBusy = true) { 137 if (withBusy) { 138 busy = true; 139 } 140 try { 141 await loadRepoInternal(); 142 } catch (error) { 143 authError = error instanceof Error ? error.message : String(error); 144 } finally { 145 if (withBusy) { 146 busy = false; 147 } 148 } 149 } 150 151 async function loadRepoInternal() { 152 authError = ""; 153 statusMessage = ""; 154 if (!session) { 155 authError = "Sign in before loading your repo."; 156 return; 157 } 158 159 const repo = session.sub; 160 const agent = await getAgent(session); 161 const result = await listSembleRepoRecords({agent, repo}); 162 repoRecords = result.records; 163 repoCardUrls = extractRepoCardUrls(result.records); 164 repoCollectionNames = extractRepoCollectionNames(result.records); 165 const describe = await agent.com.atproto.repo.describeRepo({repo}); 166 sessionHandle = describe.data.handle ?? ""; 167 statusMessage = `Repo loaded: ${result.summary.cards} cards, ${result.summary.collections} collections.`; 168 } 169 170 async function stageRecords(withBusy = true) { 171 authError = ""; 172 statusMessage = ""; 173 174 if (!parsedCards.length) { 175 authError = "Parse a source first."; 176 return; 177 } 178 if (!session) { 179 authError = "Load your repo before staging records."; 180 return; 181 } 182 183 if (withBusy) { 184 busy = true; 185 } 186 try { 187 if (!repoRecords) { 188 await loadRepoInternal(); 189 } 190 const newCards = parsedCards.filter(card => !repoCardUrls.has(card.url)); 191 await enrichMetadata(newCards); 192 193 const collectionLinks: SembleCollectionLink[] = []; 194 for (const card of newCards) { 195 if (!card.collection) continue; 196 collectionLinks.push({ 197 collectionName: card.collection, 198 cardId: card.id 199 }); 200 } 201 202 const usedCollectionNames = new Set( 203 newCards 204 .map(card => card.collection) 205 .filter((name): name is string => Boolean(name)) 206 ); 207 const collectionList = parsedCollections.filter(collection => 208 usedCollectionNames.has(collection.name) && !repoCollectionNames.has(collection.name) 209 ); 210 211 stagedCards = newCards; 212 stagedCollections = collectionList; 213 stagedLinks = collectionLinks; 214 215 draftRecords = buildMockAtprotoRecords({ 216 cards: stagedCards, 217 collections: stagedCollections, 218 collectionLinks: stagedLinks, 219 did: session.sub 220 }); 221 222 statusMessage = `Staged ${stagedCards.length} cards, ${stagedCollections.length} collections.`; 223 } catch (error) { 224 authError = error instanceof Error ? error.message : String(error); 225 } finally { 226 if (withBusy) { 227 busy = false; 228 } 229 } 230 } 231 232 async function createRecords() { 233 authError = ""; 234 statusMessage = ""; 235 if (!session || !draftRecords) { 236 authError = "Stage records before creating."; 237 return; 238 } 239 240 busy = true; 241 try { 242 const agent = await getAgent(session); 243 const result = await createSembleRecords(agent, session.sub, draftRecords, repoRecords ?? undefined); 244 if (result.failures.length > 0) { 245 authError = `Created ${result.created} records, ${result.failures.length} failed.`; 246 } else { 247 statusMessage = `Created ${result.created} records in Semble.`; 248 } 249 } catch (error) { 250 authError = error instanceof Error ? error.message : String(error); 251 } finally { 252 busy = false; 253 } 254 } 255 256 async function enrichMetadata(cards: SembleCard[]) { 257 for (const card of cards) { 258 if (!card.title || !card.metadata) { 259 const citoid = await fetchCitoidMetadata(card.url); 260 if (citoid?.metadata) { 261 card.metadata = mergeMetadata(card.metadata, citoid.metadata); 262 } 263 if (citoid?.title && !card.title) { 264 card.title = citoid.title; 265 } 266 if (citoid?.raw) { 267 console.debug("Citoid response", {url: card.url, raw: citoid.raw}); 268 } 269 } 270 271 applyUrlClassification(card); 272 } 273 } 274 275 async function fetchCitoidMetadata( 276 url: string 277 ): Promise<{title?: string; metadata?: SembleUrlMetadata; raw?: unknown} | undefined> { 278 const encoded = encodeURIComponent(url); 279 const response = await fetch(`https://en.wikipedia.org/api/rest_v1/data/citation/zotero/${encoded}`); 280 if (!response.ok) return undefined; 281 const data = (await response.json()) as unknown; 282 const record = mapCitoidPayloadToSembleRecord(data); 283 if (!record) return undefined; 284 285 return { 286 title: record.metadata.title?.trim(), 287 metadata: record.metadata, 288 raw: record.raw 289 }; 290 } 291 292 function mergeMetadata( 293 existing: SembleUrlMetadata | undefined, 294 incoming: SembleUrlMetadata 295 ): SembleUrlMetadata { 296 if (!existing) return incoming; 297 return { 298 ...incoming, 299 ...existing 300 }; 301 } 302 303 function applyUrlClassification(card: SembleCard): void { 304 const classified = UrlClassifier.classifyUrl(card.url); 305 if (!classified) return; 306 307 if (!card.metadata) { 308 card.metadata = {type: classified}; 309 return; 310 } 311 card.metadata.type = classified; 312 } 313 314 function extractRepoCardUrls(existingRepo: RepoRecordGroups): Set<string> { 315 const urls = new Set<string>(); 316 for (const entry of existingRepo.cards) { 317 const url = extractRepoUrl(entry.value); 318 if (url) urls.add(url); 319 } 320 return urls; 321 } 322 323 function extractRepoCollectionNames(existingRepo: RepoRecordGroups): Set<string> { 324 const names = new Set<string>(); 325 for (const entry of existingRepo.collections) { 326 const name = extractRepoName(entry.value); 327 if (name) names.add(name); 328 } 329 return names; 330 } 331 332 function extractRepoUrl(value: unknown): string | undefined { 333 if (!value || typeof value !== "object") return undefined; 334 const record = value as {url?: unknown; content?: {url?: unknown}}; 335 const direct = typeof record.url === "string" ? record.url : undefined; 336 if (direct) return direct; 337 const nested = record.content; 338 if (nested && typeof nested === "object" && typeof nested.url === "string") { 339 return nested.url; 340 } 341 return undefined; 342 } 343 344 function extractRepoName(value: unknown): string | undefined { 345 if (!value || typeof value !== "object") return undefined; 346 const record = value as {name?: unknown}; 347 return typeof record.name === "string" ? record.name : undefined; 348 } 349 350 async function getAgent(session: OAuthSession) { 351 const {Agent} = await import("@atproto/api"); 352 return new Agent(session); 353 } 354 355 $: oauthReady = Boolean(handle.trim() && clientId && redirectUri); 356 $: stagedReady = stagedCards.length > 0 || stagedCollections.length > 0; 357 $: repoCardList = repoRecords 358 ? repoRecords.cards.map(entry => ({ 359 uri: entry.uri, 360 url: extractRepoUrl(entry.value), 361 value: entry.value 362 })) 363 : []; 364 $: repoCardListVisible = repoCardList.slice(0, repoListLimit); 365 $: repoCardOverflow = Math.max(0, repoCardList.length - repoListLimit); 366 $: stagedCollectionNames = new Set(stagedCollections.map(collection => collection.name)); 367 $: parsedCollectionsByName = new Map(parsedCollections.map(collection => [collection.name, collection])); 368 $: stagedCardsWithoutCollection = stagedCards.filter(card => !card.collection); 369 $: stagedCollectionOrder = [ 370 ...parsedCollections.map(collection => collection.name), 371 ...stagedCards 372 .map(card => card.collection) 373 .filter((name): name is string => Boolean(name)) 374 ].filter((name, index, list) => list.indexOf(name) === index); 375 $: stagedCollectionGroups = stagedCollectionOrder 376 .map(name => ({ 377 name, 378 description: parsedCollectionsByName.get(name)?.description, 379 isNew: stagedCollectionNames.has(name), 380 cards: stagedCards.filter(card => card.collection === name) 381 })) 382 .filter(group => group.cards.length > 0); 383 $: draftCardById = new Map( 384 draftRecords?.cards.map(record => [record.recordId, record]) ?? [] 385 ); 386 $: draftNoteByCardId = new Map( 387 draftRecords?.notes 388 .map(record => (record.cardId ? [record.cardId, record] : null)) 389 .filter( 390 (entry): entry is [string, (typeof draftRecords)["notes"][number]] => entry !== null 391 ) ?? [] 392 ); 393</script> 394 395<div class="app-shell fade-in"> 396 <section class="hero"> 397 <div class="brand"> 398 <img class="brand-logo" src="/md2semble.png" alt="Markdown Semble Importer logo" /> 399 </div> 400 <h1>Markdown → Semble Importer</h1> 401 </section> 402 403 <section class="stack"> 404 <div class="card"> 405 <h2>Connect with ATProto</h2> 406 {#if session} 407 <div class="notice"> 408 Signed in as {session.sub}{sessionHandle ? ` (${sessionHandle})` : ""}. 409 </div> 410 <button class="secondary" on:click={signOut}>Sign out</button> 411 {:else} 412 <div class="field"> 413 <label for="handle">Handle</label> 414 <input id="handle" bind:value={handle} placeholder="alice.bsky.social" /> 415 </div> 416 <button on:click={startOAuth} disabled={!oauthReady || busy}>Start OAuth login</button> 417 {/if} 418 </div> 419 420 <div class="card"> 421 <h2>Markdown source</h2> 422 <p> 423 Use headings for collections and list items for cards. Add a note with <code> : </code> after 424 the URL. 425 </p> 426 <pre> 427# Control 428- https://arxiv.org/abs/1234.5678 : Classic paper 429- [A neat blog](https://example.com) : read later 430 </pre> 431 <div class="field"> 432 <label for="sourceUrl">Source URL</label> 433 <input id="sourceUrl" bind:value={sourceUrl} placeholder="https://gist.github.com/..." /> 434 </div> 435 <div class="field"> 436 <label for="sourceText">Or paste markdown</label> 437 <textarea id="sourceText" bind:value={sourceText}></textarea> 438 </div> 439 <button class="secondary" on:click={fetchSource} disabled={busy}>Parse source</button> 440 {#if parsedCards.length > 0} 441 <div class="notice"> 442 Parsed {parsedCards.length} cards and {parsedCollections.length} collections. 443 </div> 444 {/if} 445 </div> 446 447 <div class="card"> 448 <h2>Load Semble repo</h2> 449 <p>Fetch existing records so we only create new URLs.</p> 450 <button class="secondary" on:click={loadRepo} disabled={!session || busy}> 451 {repoRecords ? "Reload repo" : "Load repo"} 452 </button> 453 {#if repoRecords} 454 <div class="notice"> 455 Repo has {repoCardUrls.size} cards, {repoCollectionNames.size} collections. 456 </div> 457 <details class="details"> 458 <summary>Show existing records</summary> 459 <div class="list"> 460 {#if repoCardListVisible.length === 0} 461 <div class="list-item">No cards found in the repo.</div> 462 {:else} 463 {#each repoCardListVisible as entry} 464 <div class="list-item"> 465 {#if entry.url} 466 <strong>{entry.url}</strong> 467 {/if} 468 <span>{entry.uri}</span> 469 <pre class="code-block">{JSON.stringify(entry.value, null, 2)}</pre> 470 </div> 471 {/each} 472 {/if} 473 {#if repoCardOverflow > 0} 474 <div class="list-item">And {repoCardOverflow} more…</div> 475 {/if} 476 </div> 477 </details> 478 {/if} 479 </div> 480 481 </section> 482 483 {#if authError} 484 <div class="notice">{authError}</div> 485 {/if} 486 {#if statusMessage} 487 <div class="notice">{statusMessage}</div> 488 {/if} 489 490 {#if stagedReady} 491 <section class="stack"> 492 <div class="card"> 493 <h2>Staged records</h2> 494 <div class="list"> 495 {#if stagedCardsWithoutCollection.length > 0} 496 <div class="list-item"> 497 <strong>Unsorted</strong> 498 <span>Cards without a collection heading.</span> 499 </div> 500 {#each stagedCardsWithoutCollection as card} 501 {@const cardDraft = draftCardById.get(card.id)} 502 {@const noteDraft = draftNoteByCardId.get(card.id)} 503 <div class="list-item"> 504 <strong>{card.title ?? card.url}</strong> 505 <span>{card.url}</span> 506 {#if cardDraft} 507 <pre class="code-block">{JSON.stringify(cardDraft.record, null, 2)}</pre> 508 {:else} 509 <pre class="code-block">{JSON.stringify({error: "Missing card record."}, null, 2)}</pre> 510 {/if} 511 {#if noteDraft} 512 <pre class="code-block">{JSON.stringify(noteDraft.record, null, 2)}</pre> 513 {/if} 514 </div> 515 {/each} 516 {/if} 517 {#each stagedCollectionGroups as group} 518 <div class="list-item"> 519 <strong>{group.name}</strong> 520 {#if group.isNew} 521 <span class="badge">New collection</span> 522 {/if} 523 {#if group.description} 524 <span>{group.description}</span> 525 {/if} 526 </div> 527 {#each group.cards as card} 528 {@const cardDraft = draftCardById.get(card.id)} 529 {@const noteDraft = draftNoteByCardId.get(card.id)} 530 <div class="list-item"> 531 <strong>{card.title ?? card.url}</strong> 532 <span>{card.url}</span> 533 {#if cardDraft} 534 <pre class="code-block">{JSON.stringify(cardDraft.record, null, 2)}</pre> 535 {:else} 536 <pre class="code-block">{JSON.stringify({error: "Missing card record."}, null, 2)}</pre> 537 {/if} 538 {#if noteDraft} 539 <pre class="code-block">{JSON.stringify(noteDraft.record, null, 2)}</pre> 540 {/if} 541 </div> 542 {/each} 543 {/each} 544 </div> 545 <button class="secondary" on:click={createRecords} disabled={!stagedReady || busy}> 546 Create records 547 </button> 548 <div class="notice"> 549 Ready to create {stagedCards.length} cards, {stagedCollections.length} collections,{" "} 550 {stagedLinks.length} links. 551 </div> 552 </div> 553 </section> 554 {/if} 555</div>