···11+/**
22+ * Utilities for deduplicating records that may exist under both
33+ * pub.leaflet.* and site.standard.* namespaces.
44+ *
55+ * After the migration to site.standard.*, records can exist in both namespaces
66+ * with the same DID and rkey. This utility deduplicates them, preferring
77+ * site.standard.* records when available.
88+ */
99+1010+import { AtUri } from "@atproto/syntax";
1111+1212+/**
1313+ * Extracts the identity key (DID + rkey) from an AT URI.
1414+ * This key uniquely identifies a record across namespaces.
1515+ *
1616+ * @example
1717+ * getRecordIdentityKey("at://did:plc:abc/pub.leaflet.document/3abc")
1818+ * // Returns: "did:plc:abc/3abc"
1919+ *
2020+ * getRecordIdentityKey("at://did:plc:abc/site.standard.document/3abc")
2121+ * // Returns: "did:plc:abc/3abc" (same key, different namespace)
2222+ */
2323+function getRecordIdentityKey(uri: string): string | null {
2424+ try {
2525+ const parsed = new AtUri(uri);
2626+ return `${parsed.host}/${parsed.rkey}`;
2727+ } catch {
2828+ return null;
2929+ }
3030+}
3131+3232+/**
3333+ * Checks if a URI is from the site.standard namespace.
3434+ */
3535+function isSiteStandardUri(uri: string): boolean {
3636+ return uri.includes("/site.standard.");
3737+}
3838+3939+/**
4040+ * Deduplicates an array of records that have a `uri` property.
4141+ *
4242+ * When records exist under both pub.leaflet.* and site.standard.* namespaces
4343+ * (same DID and rkey), this function keeps only the site.standard version.
4444+ *
4545+ * @param records - Array of records with a `uri` property
4646+ * @returns Deduplicated array, preferring site.standard records
4747+ *
4848+ * @example
4949+ * const docs = [
5050+ * { uri: "at://did:plc:abc/pub.leaflet.document/3abc", data: {...} },
5151+ * { uri: "at://did:plc:abc/site.standard.document/3abc", data: {...} },
5252+ * { uri: "at://did:plc:abc/pub.leaflet.document/3def", data: {...} },
5353+ * ];
5454+ * const deduped = deduplicateByUri(docs);
5555+ * // Returns: [
5656+ * // { uri: "at://did:plc:abc/site.standard.document/3abc", data: {...} },
5757+ * // { uri: "at://did:plc:abc/pub.leaflet.document/3def", data: {...} },
5858+ * // ]
5959+ */
6060+export function deduplicateByUri<T extends { uri: string }>(records: T[]): T[] {
6161+ const recordsByKey = new Map<string, T>();
6262+6363+ for (const record of records) {
6464+ const key = getRecordIdentityKey(record.uri);
6565+ if (!key) {
6666+ // Invalid URI, keep the record as-is
6767+ continue;
6868+ }
6969+7070+ const existing = recordsByKey.get(key);
7171+ if (!existing) {
7272+ recordsByKey.set(key, record);
7373+ } else {
7474+ // Prefer site.standard records over pub.leaflet records
7575+ if (isSiteStandardUri(record.uri) && !isSiteStandardUri(existing.uri)) {
7676+ recordsByKey.set(key, record);
7777+ }
7878+ // If both are same namespace or existing is already site.standard, keep existing
7979+ }
8080+ }
8181+8282+ return Array.from(recordsByKey.values());
8383+}
8484+8585+/**
8686+ * Deduplicates records while preserving the original order based on the first
8787+ * occurrence of each unique record.
8888+ *
8989+ * Same deduplication logic as deduplicateByUri, but maintains insertion order.
9090+ *
9191+ * @param records - Array of records with a `uri` property
9292+ * @returns Deduplicated array in original order, preferring site.standard records
9393+ */
9494+export function deduplicateByUriOrdered<T extends { uri: string }>(
9595+ records: T[]
9696+): T[] {
9797+ const recordsByKey = new Map<string, { record: T; index: number }>();
9898+9999+ for (let i = 0; i < records.length; i++) {
100100+ const record = records[i];
101101+ const key = getRecordIdentityKey(record.uri);
102102+ if (!key) {
103103+ continue;
104104+ }
105105+106106+ const existing = recordsByKey.get(key);
107107+ if (!existing) {
108108+ recordsByKey.set(key, { record, index: i });
109109+ } else {
110110+ // Prefer site.standard records over pub.leaflet records
111111+ if (isSiteStandardUri(record.uri) && !isSiteStandardUri(existing.record.uri)) {
112112+ // Replace with site.standard but keep original position
113113+ recordsByKey.set(key, { record, index: existing.index });
114114+ }
115115+ }
116116+ }
117117+118118+ // Sort by original index to maintain order
119119+ return Array.from(recordsByKey.values())
120120+ .sort((a, b) => a.index - b.index)
121121+ .map((entry) => entry.record);
122122+}