Markdown -> Semble importer
1<script lang="ts">
2 import {onMount} from "svelte";
3 import type {OAuthSession} from "@atproto/oauth-client-browser";
4 import {listSembleRepoRecords} from "./lib/atproto/repo";
5 import {buildMockAtprotoRecords} from "./lib/atproto/record-builder";
6 import {createSembleRecords} from "./lib/atproto/create";
7 import {mapCitoidPayloadToSembleRecord} from "./lib/metadata/citoid-mapper";
8 import {UrlClassifier} from "./lib/metadata/url-classifier";
9 import type {SembleUrlMetadata} from "./lib/metadata/citoid";
10 import {parseMarkdownSource} from "./lib/parse/markdown";
11 import {normalizeSourceUrl, isProbablyUrl} from "./lib/utils/urls";
12 import type {SembleCard, SembleCollection, SembleCollectionLink} from "./lib/semble/types";
13 import type {RepoRecordGroups} from "./lib/atproto/diff";
14 import {initOAuthClient, startOAuthSignIn, revokeOAuthSession, getOAuthDefaults} from "./lib/atproto/oauth";
15
16 let session: OAuthSession | null = null;
17 let sessionHandle = "";
18 let oauthInfo = getOAuthDefaults();
19 let authError = "";
20 let statusMessage = "";
21
22 const env = import.meta.env;
23 let handle = "";
24 const clientId = env.VITE_CLIENT_ID ?? "";
25 const redirectUri = env.VITE_REDIRECT_URI ?? "";
26
27 let sourceUrl = "";
28 let sourceText = "";
29 let parsedCards: SembleCard[] = [];
30 let parsedCollections: SembleCollection[] = [];
31
32 let repoRecords: RepoRecordGroups | null = null;
33 let repoCardUrls = new Set<string>();
34 let repoCollectionNames = new Set<string>();
35
36 let stagedCards: SembleCard[] = [];
37 let stagedCollections: SembleCollection[] = [];
38 let stagedLinks: SembleCollectionLink[] = [];
39 let draftRecords: ReturnType<typeof buildMockAtprotoRecords> | null = null;
40 const repoListLimit = 50;
41
42 let busy = false;
43
44 onMount(async () => {
45 try {
46 const result = await initOAuthClient();
47 oauthInfo = getOAuthDefaults();
48 if (result.session) {
49 session = result.session;
50 statusMessage = result.state ? "OAuth login complete." : "OAuth session restored.";
51 await loadRepo();
52 }
53 } catch (error) {
54 authError = error instanceof Error ? error.message : String(error);
55 }
56 });
57
58 async function startOAuth() {
59 authError = "";
60 statusMessage = "";
61 if (!handle.trim()) {
62 authError = "Enter your handle first.";
63 return;
64 }
65 if (!clientId || !redirectUri) {
66 authError = "Missing client metadata or redirect URI. Check your env config.";
67 return;
68 }
69 try {
70 await startOAuthSignIn(handle.trim());
71 } catch (error) {
72 authError = error instanceof Error ? error.message : String(error);
73 }
74 }
75
76 async function signOut() {
77 if (session) {
78 await revokeOAuthSession(session);
79 }
80 session = null;
81 sessionHandle = "";
82 }
83
84 async function fetchSource() {
85 statusMessage = "";
86 authError = "";
87 parsedCards = [];
88 parsedCollections = [];
89 stagedCards = [];
90 stagedCollections = [];
91 stagedLinks = [];
92 draftRecords = null;
93
94 const trimmedUrl = sourceUrl.trim();
95 const hasUrl = trimmedUrl.length > 0 && isProbablyUrl(trimmedUrl);
96 if (!hasUrl && !sourceText.trim()) {
97 authError = "Provide a source URL or paste text.";
98 return;
99 }
100
101 busy = true;
102 try {
103 let text: string;
104 let sourceId = "manual-input";
105 if (hasUrl) {
106 const normalized = normalizeSourceUrl(trimmedUrl);
107 sourceId = normalized;
108 const response = await fetch(normalized);
109 if (!response.ok) {
110 throw new Error(`Source fetch failed: ${response.status}`);
111 }
112 text = await response.text();
113 } else {
114 text = sourceText;
115 }
116
117 const parsed = parseMarkdownSource(sourceId, text);
118 parsedCards = parsed.cards;
119 parsedCollections = parsed.collections;
120 statusMessage = `Parsed ${parsedCards.length} cards.`;
121 if (session) {
122 if (!repoRecords) {
123 await loadRepo(false);
124 }
125 await stageRecords(false);
126 } else {
127 statusMessage = `Parsed ${parsedCards.length} cards. Sign in to stage new records.`;
128 }
129 } catch (error) {
130 authError = error instanceof Error ? error.message : String(error);
131 } finally {
132 busy = false;
133 }
134 }
135
136 async function loadRepo(withBusy = true) {
137 if (withBusy) {
138 busy = true;
139 }
140 try {
141 await loadRepoInternal();
142 } catch (error) {
143 authError = error instanceof Error ? error.message : String(error);
144 } finally {
145 if (withBusy) {
146 busy = false;
147 }
148 }
149 }
150
151 async function loadRepoInternal() {
152 authError = "";
153 statusMessage = "";
154 if (!session) {
155 authError = "Sign in before loading your repo.";
156 return;
157 }
158
159 const repo = session.sub;
160 const agent = await getAgent(session);
161 const result = await listSembleRepoRecords({agent, repo});
162 repoRecords = result.records;
163 repoCardUrls = extractRepoCardUrls(result.records);
164 repoCollectionNames = extractRepoCollectionNames(result.records);
165 const describe = await agent.com.atproto.repo.describeRepo({repo});
166 sessionHandle = describe.data.handle ?? "";
167 statusMessage = `Repo loaded: ${result.summary.cards} cards, ${result.summary.collections} collections.`;
168 }
169
170 async function stageRecords(withBusy = true) {
171 authError = "";
172 statusMessage = "";
173
174 if (!parsedCards.length) {
175 authError = "Parse a source first.";
176 return;
177 }
178 if (!session) {
179 authError = "Load your repo before staging records.";
180 return;
181 }
182
183 if (withBusy) {
184 busy = true;
185 }
186 try {
187 if (!repoRecords) {
188 await loadRepoInternal();
189 }
190 const newCards = parsedCards.filter(card => !repoCardUrls.has(card.url));
191 await enrichMetadata(newCards);
192
193 const collectionLinks: SembleCollectionLink[] = [];
194 for (const card of newCards) {
195 if (!card.collection) continue;
196 collectionLinks.push({
197 collectionName: card.collection,
198 cardId: card.id
199 });
200 }
201
202 const usedCollectionNames = new Set(
203 newCards
204 .map(card => card.collection)
205 .filter((name): name is string => Boolean(name))
206 );
207 const collectionList = parsedCollections.filter(collection =>
208 usedCollectionNames.has(collection.name) && !repoCollectionNames.has(collection.name)
209 );
210
211 stagedCards = newCards;
212 stagedCollections = collectionList;
213 stagedLinks = collectionLinks;
214
215 draftRecords = buildMockAtprotoRecords({
216 cards: stagedCards,
217 collections: stagedCollections,
218 collectionLinks: stagedLinks,
219 did: session.sub
220 });
221
222 statusMessage = `Staged ${stagedCards.length} cards, ${stagedCollections.length} collections.`;
223 } catch (error) {
224 authError = error instanceof Error ? error.message : String(error);
225 } finally {
226 if (withBusy) {
227 busy = false;
228 }
229 }
230 }
231
232 async function createRecords() {
233 authError = "";
234 statusMessage = "";
235 if (!session || !draftRecords) {
236 authError = "Stage records before creating.";
237 return;
238 }
239
240 busy = true;
241 try {
242 const agent = await getAgent(session);
243 const result = await createSembleRecords(agent, session.sub, draftRecords, repoRecords ?? undefined);
244 if (result.failures.length > 0) {
245 authError = `Created ${result.created} records, ${result.failures.length} failed.`;
246 } else {
247 statusMessage = `Created ${result.created} records in Semble.`;
248 }
249 } catch (error) {
250 authError = error instanceof Error ? error.message : String(error);
251 } finally {
252 busy = false;
253 }
254 }
255
256 async function enrichMetadata(cards: SembleCard[]) {
257 for (const card of cards) {
258 if (!card.title || !card.metadata) {
259 const citoid = await fetchCitoidMetadata(card.url);
260 if (citoid?.metadata) {
261 card.metadata = mergeMetadata(card.metadata, citoid.metadata);
262 }
263 if (citoid?.title && !card.title) {
264 card.title = citoid.title;
265 }
266 if (citoid?.raw) {
267 console.debug("Citoid response", {url: card.url, raw: citoid.raw});
268 }
269 }
270
271 applyUrlClassification(card);
272 }
273 }
274
275 async function fetchCitoidMetadata(
276 url: string
277 ): Promise<{title?: string; metadata?: SembleUrlMetadata; raw?: unknown} | undefined> {
278 const encoded = encodeURIComponent(url);
279 const response = await fetch(`https://en.wikipedia.org/api/rest_v1/data/citation/zotero/${encoded}`);
280 if (!response.ok) return undefined;
281 const data = (await response.json()) as unknown;
282 const record = mapCitoidPayloadToSembleRecord(data);
283 if (!record) return undefined;
284
285 return {
286 title: record.metadata.title?.trim(),
287 metadata: record.metadata,
288 raw: record.raw
289 };
290 }
291
292 function mergeMetadata(
293 existing: SembleUrlMetadata | undefined,
294 incoming: SembleUrlMetadata
295 ): SembleUrlMetadata {
296 if (!existing) return incoming;
297 return {
298 ...incoming,
299 ...existing
300 };
301 }
302
303 function applyUrlClassification(card: SembleCard): void {
304 const classified = UrlClassifier.classifyUrl(card.url);
305 if (!classified) return;
306
307 if (!card.metadata) {
308 card.metadata = {type: classified};
309 return;
310 }
311 card.metadata.type = classified;
312 }
313
314 function extractRepoCardUrls(existingRepo: RepoRecordGroups): Set<string> {
315 const urls = new Set<string>();
316 for (const entry of existingRepo.cards) {
317 const url = extractRepoUrl(entry.value);
318 if (url) urls.add(url);
319 }
320 return urls;
321 }
322
323 function extractRepoCollectionNames(existingRepo: RepoRecordGroups): Set<string> {
324 const names = new Set<string>();
325 for (const entry of existingRepo.collections) {
326 const name = extractRepoName(entry.value);
327 if (name) names.add(name);
328 }
329 return names;
330 }
331
332 function extractRepoUrl(value: unknown): string | undefined {
333 if (!value || typeof value !== "object") return undefined;
334 const record = value as {url?: unknown; content?: {url?: unknown}};
335 const direct = typeof record.url === "string" ? record.url : undefined;
336 if (direct) return direct;
337 const nested = record.content;
338 if (nested && typeof nested === "object" && typeof nested.url === "string") {
339 return nested.url;
340 }
341 return undefined;
342 }
343
344 function extractRepoName(value: unknown): string | undefined {
345 if (!value || typeof value !== "object") return undefined;
346 const record = value as {name?: unknown};
347 return typeof record.name === "string" ? record.name : undefined;
348 }
349
350 async function getAgent(session: OAuthSession) {
351 const {Agent} = await import("@atproto/api");
352 return new Agent(session);
353 }
354
355 $: oauthReady = Boolean(handle.trim() && clientId && redirectUri);
356 $: stagedReady = stagedCards.length > 0 || stagedCollections.length > 0;
357 $: repoCardList = repoRecords
358 ? repoRecords.cards.map(entry => ({
359 uri: entry.uri,
360 url: extractRepoUrl(entry.value),
361 value: entry.value
362 }))
363 : [];
364 $: repoCardListVisible = repoCardList.slice(0, repoListLimit);
365 $: repoCardOverflow = Math.max(0, repoCardList.length - repoListLimit);
366 $: stagedCollectionNames = new Set(stagedCollections.map(collection => collection.name));
367 $: parsedCollectionsByName = new Map(parsedCollections.map(collection => [collection.name, collection]));
368 $: stagedCardsWithoutCollection = stagedCards.filter(card => !card.collection);
369 $: stagedCollectionOrder = [
370 ...parsedCollections.map(collection => collection.name),
371 ...stagedCards
372 .map(card => card.collection)
373 .filter((name): name is string => Boolean(name))
374 ].filter((name, index, list) => list.indexOf(name) === index);
375 $: stagedCollectionGroups = stagedCollectionOrder
376 .map(name => ({
377 name,
378 description: parsedCollectionsByName.get(name)?.description,
379 isNew: stagedCollectionNames.has(name),
380 cards: stagedCards.filter(card => card.collection === name)
381 }))
382 .filter(group => group.cards.length > 0);
383 $: draftCardById = new Map(
384 draftRecords?.cards.map(record => [record.recordId, record]) ?? []
385 );
386 $: draftNoteByCardId = new Map(
387 draftRecords?.notes
388 .map(record => (record.cardId ? [record.cardId, record] : null))
389 .filter(
390 (entry): entry is [string, (typeof draftRecords)["notes"][number]] => entry !== null
391 ) ?? []
392 );
393</script>
394
395<div class="app-shell fade-in">
396 <section class="hero">
397 <div class="brand">
398 <img class="brand-logo" src="/md2semble.png" alt="Markdown Semble Importer logo" />
399 </div>
400 <h1>Markdown → Semble Importer</h1>
401 </section>
402
403 <section class="stack">
404 <div class="card">
405 <h2>Connect with ATProto</h2>
406 {#if session}
407 <div class="notice">
408 Signed in as {session.sub}{sessionHandle ? ` (${sessionHandle})` : ""}.
409 </div>
410 <button class="secondary" on:click={signOut}>Sign out</button>
411 {:else}
412 <div class="field">
413 <label for="handle">Handle</label>
414 <input id="handle" bind:value={handle} placeholder="alice.bsky.social" />
415 </div>
416 <button on:click={startOAuth} disabled={!oauthReady || busy}>Start OAuth login</button>
417 {/if}
418 </div>
419
420 <div class="card">
421 <h2>Markdown source</h2>
422 <p>
423 Use headings for collections and list items for cards. Add a note with <code> : </code> after
424 the URL.
425 </p>
426 <pre>
427# Control
428- https://arxiv.org/abs/1234.5678 : Classic paper
429- [A neat blog](https://example.com) : read later
430 </pre>
431 <div class="field">
432 <label for="sourceUrl">Source URL</label>
433 <input id="sourceUrl" bind:value={sourceUrl} placeholder="https://gist.github.com/..." />
434 </div>
435 <div class="field">
436 <label for="sourceText">Or paste markdown</label>
437 <textarea id="sourceText" bind:value={sourceText}></textarea>
438 </div>
439 <button class="secondary" on:click={fetchSource} disabled={busy}>Parse source</button>
440 {#if parsedCards.length > 0}
441 <div class="notice">
442 Parsed {parsedCards.length} cards and {parsedCollections.length} collections.
443 </div>
444 {/if}
445 </div>
446
447 <div class="card">
448 <h2>Load Semble repo</h2>
449 <p>Fetch existing records so we only create new URLs.</p>
450 <button class="secondary" on:click={loadRepo} disabled={!session || busy}>
451 {repoRecords ? "Reload repo" : "Load repo"}
452 </button>
453 {#if repoRecords}
454 <div class="notice">
455 Repo has {repoCardUrls.size} cards, {repoCollectionNames.size} collections.
456 </div>
457 <details class="details">
458 <summary>Show existing records</summary>
459 <div class="list">
460 {#if repoCardListVisible.length === 0}
461 <div class="list-item">No cards found in the repo.</div>
462 {:else}
463 {#each repoCardListVisible as entry}
464 <div class="list-item">
465 {#if entry.url}
466 <strong>{entry.url}</strong>
467 {/if}
468 <span>{entry.uri}</span>
469 <pre class="code-block">{JSON.stringify(entry.value, null, 2)}</pre>
470 </div>
471 {/each}
472 {/if}
473 {#if repoCardOverflow > 0}
474 <div class="list-item">And {repoCardOverflow} more…</div>
475 {/if}
476 </div>
477 </details>
478 {/if}
479 </div>
480
481 </section>
482
483 {#if authError}
484 <div class="notice">{authError}</div>
485 {/if}
486 {#if statusMessage}
487 <div class="notice">{statusMessage}</div>
488 {/if}
489
490 {#if stagedReady}
491 <section class="stack">
492 <div class="card">
493 <h2>Staged records</h2>
494 <div class="list">
495 {#if stagedCardsWithoutCollection.length > 0}
496 <div class="list-item">
497 <strong>Unsorted</strong>
498 <span>Cards without a collection heading.</span>
499 </div>
500 {#each stagedCardsWithoutCollection as card}
501 {@const cardDraft = draftCardById.get(card.id)}
502 {@const noteDraft = draftNoteByCardId.get(card.id)}
503 <div class="list-item">
504 <strong>{card.title ?? card.url}</strong>
505 <span>{card.url}</span>
506 {#if cardDraft}
507 <pre class="code-block">{JSON.stringify(cardDraft.record, null, 2)}</pre>
508 {:else}
509 <pre class="code-block">{JSON.stringify({error: "Missing card record."}, null, 2)}</pre>
510 {/if}
511 {#if noteDraft}
512 <pre class="code-block">{JSON.stringify(noteDraft.record, null, 2)}</pre>
513 {/if}
514 </div>
515 {/each}
516 {/if}
517 {#each stagedCollectionGroups as group}
518 <div class="list-item">
519 <strong>{group.name}</strong>
520 {#if group.isNew}
521 <span class="badge">New collection</span>
522 {/if}
523 {#if group.description}
524 <span>{group.description}</span>
525 {/if}
526 </div>
527 {#each group.cards as card}
528 {@const cardDraft = draftCardById.get(card.id)}
529 {@const noteDraft = draftNoteByCardId.get(card.id)}
530 <div class="list-item">
531 <strong>{card.title ?? card.url}</strong>
532 <span>{card.url}</span>
533 {#if cardDraft}
534 <pre class="code-block">{JSON.stringify(cardDraft.record, null, 2)}</pre>
535 {:else}
536 <pre class="code-block">{JSON.stringify({error: "Missing card record."}, null, 2)}</pre>
537 {/if}
538 {#if noteDraft}
539 <pre class="code-block">{JSON.stringify(noteDraft.record, null, 2)}</pre>
540 {/if}
541 </div>
542 {/each}
543 {/each}
544 </div>
545 <button class="secondary" on:click={createRecords} disabled={!stagedReady || busy}>
546 Create records
547 </button>
548 <div class="notice">
549 Ready to create {stagedCards.length} cards, {stagedCollections.length} collections,{" "}
550 {stagedLinks.length} links.
551 </div>
552 </div>
553 </section>
554 {/if}
555</div>