···1+/**
2+ * Utilities for deduplicating records that may exist under both
3+ * pub.leaflet.* and site.standard.* namespaces.
4+ *
5+ * After the migration to site.standard.*, records can exist in both namespaces
6+ * with the same DID and rkey. This utility deduplicates them, preferring
7+ * site.standard.* records when available.
8+ */
9+10+import { AtUri } from "@atproto/syntax";
11+12+/**
13+ * Extracts the identity key (DID + rkey) from an AT URI.
14+ * This key uniquely identifies a record across namespaces.
15+ *
16+ * @example
17+ * getRecordIdentityKey("at://did:plc:abc/pub.leaflet.document/3abc")
18+ * // Returns: "did:plc:abc/3abc"
19+ *
20+ * getRecordIdentityKey("at://did:plc:abc/site.standard.document/3abc")
21+ * // Returns: "did:plc:abc/3abc" (same key, different namespace)
22+ */
23+function getRecordIdentityKey(uri: string): string | null {
24+ try {
25+ const parsed = new AtUri(uri);
26+ return `${parsed.host}/${parsed.rkey}`;
27+ } catch {
28+ return null;
29+ }
30+}
31+32+/**
33+ * Checks if a URI is from the site.standard namespace.
34+ */
35+function isSiteStandardUri(uri: string): boolean {
36+ return uri.includes("/site.standard.");
37+}
38+39+/**
40+ * Deduplicates an array of records that have a `uri` property.
41+ *
42+ * When records exist under both pub.leaflet.* and site.standard.* namespaces
43+ * (same DID and rkey), this function keeps only the site.standard version.
44+ *
45+ * @param records - Array of records with a `uri` property
46+ * @returns Deduplicated array, preferring site.standard records
47+ *
48+ * @example
49+ * const docs = [
50+ * { uri: "at://did:plc:abc/pub.leaflet.document/3abc", data: {...} },
51+ * { uri: "at://did:plc:abc/site.standard.document/3abc", data: {...} },
52+ * { uri: "at://did:plc:abc/pub.leaflet.document/3def", data: {...} },
53+ * ];
54+ * const deduped = deduplicateByUri(docs);
55+ * // Returns: [
56+ * // { uri: "at://did:plc:abc/site.standard.document/3abc", data: {...} },
57+ * // { uri: "at://did:plc:abc/pub.leaflet.document/3def", data: {...} },
58+ * // ]
59+ */
60+export function deduplicateByUri<T extends { uri: string }>(records: T[]): T[] {
61+ const recordsByKey = new Map<string, T>();
62+63+ for (const record of records) {
64+ const key = getRecordIdentityKey(record.uri);
65+ if (!key) {
66+ // Invalid URI, keep the record as-is
67+ continue;
68+ }
69+70+ const existing = recordsByKey.get(key);
71+ if (!existing) {
72+ recordsByKey.set(key, record);
73+ } else {
74+ // Prefer site.standard records over pub.leaflet records
75+ if (isSiteStandardUri(record.uri) && !isSiteStandardUri(existing.uri)) {
76+ recordsByKey.set(key, record);
77+ }
78+ // If both are same namespace or existing is already site.standard, keep existing
79+ }
80+ }
81+82+ return Array.from(recordsByKey.values());
83+}
84+85+/**
86+ * Deduplicates records while preserving the original order based on the first
87+ * occurrence of each unique record.
88+ *
89+ * Same deduplication logic as deduplicateByUri, but maintains insertion order.
90+ *
91+ * @param records - Array of records with a `uri` property
92+ * @returns Deduplicated array in original order, preferring site.standard records
93+ */
94+export function deduplicateByUriOrdered<T extends { uri: string }>(
95+ records: T[]
96+): T[] {
97+ const recordsByKey = new Map<string, { record: T; index: number }>();
98+99+ for (let i = 0; i < records.length; i++) {
100+ const record = records[i];
101+ const key = getRecordIdentityKey(record.uri);
102+ if (!key) {
103+ continue;
104+ }
105+106+ const existing = recordsByKey.get(key);
107+ if (!existing) {
108+ recordsByKey.set(key, { record, index: i });
109+ } else {
110+ // Prefer site.standard records over pub.leaflet records
111+ if (isSiteStandardUri(record.uri) && !isSiteStandardUri(existing.record.uri)) {
112+ // Replace with site.standard but keep original position
113+ recordsByKey.set(key, { record, index: existing.index });
114+ }
115+ }
116+ }
117+118+ // Sort by original index to maintain order
119+ return Array.from(recordsByKey.values())
120+ .sort((a, b) => a.index - b.index)
121+ .map((entry) => entry.record);
122+}