forked from
mary.my.id/atcute
a collection of lightweight TypeScript packages for AT Protocol, the protocol powering Bluesky
1import type { CidLink } from '@atcute/cid';
2
3import type { NodeStore } from './node-store.ts';
4import { NodeWalker } from './node-walker.ts';
5
6/**
7 * Type of change to a record
8 */
9export enum DeltaType {
10 CREATED = 1,
11 UPDATED = 2,
12 DELETED = 3,
13}
14
15/**
16 * Represents a change to a single record
17 */
18export interface RecordDelta {
19 /** type of change */
20 deltaType: DeltaType;
21 /** record path (collection/rkey) */
22 path: string;
23 /** CID before the change (null for creates) */
24 priorValue: CidLink | null;
25 /** CID after the change (null for deletes) */
26 laterValue: CidLink | null;
27}
28
29/**
30 * Given two sets of MST nodes, returns an iterator of record-level changes
31 * @param ns the node store
32 * @param created set of node CIDs that were created
33 * @param deleted set of node CIDs that were deleted
34 * @yields record deltas describing the changes
35 */
36export async function* recordDiff(
37 ns: NodeStore,
38 created: Set<string>,
39 deleted: Set<string>,
40): AsyncGenerator<RecordDelta> {
41 // Build maps of all keys and values in created/deleted nodes
42 const createdKv = new Map<string, CidLink>();
43 for (const cid of created) {
44 const node = await ns.get(cid);
45 for (let i = 0; i < node.keys.length; i++) {
46 createdKv.set(node.keys[i], node.values[i]);
47 }
48 }
49
50 const deletedKv = new Map<string, CidLink>();
51 for (const cid of deleted) {
52 const node = await ns.get(cid);
53 for (let i = 0; i < node.keys.length; i++) {
54 deletedKv.set(node.keys[i], node.values[i]);
55 }
56 }
57
58 // Find keys that were created (in created but not deleted)
59 for (const [key, value] of createdKv) {
60 if (!deletedKv.has(key)) {
61 yield {
62 deltaType: DeltaType.CREATED,
63 path: key,
64 priorValue: null,
65 laterValue: value,
66 };
67 }
68 }
69
70 // Find keys that were updated (in both, but with different values)
71 for (const [key, newValue] of createdKv) {
72 const oldValue = deletedKv.get(key);
73 if (oldValue !== undefined && oldValue.$link !== newValue.$link) {
74 yield {
75 deltaType: DeltaType.UPDATED,
76 path: key,
77 priorValue: oldValue,
78 laterValue: newValue,
79 };
80 }
81 }
82
83 // Find keys that were deleted (in deleted but not created)
84 for (const [key, value] of deletedKv) {
85 if (!createdKv.has(key)) {
86 yield {
87 deltaType: DeltaType.DELETED,
88 path: key,
89 priorValue: value,
90 laterValue: null,
91 };
92 }
93 }
94}
95
96/**
97 * Slow but obvious MST diff implementation for testing
98 * Enumerates all nodes in both trees and compares them
99 * @param ns the node store
100 * @param rootA CID of first MST root
101 * @param rootB CID of second MST root
102 * @returns tuple of [created nodes, deleted nodes]
103 */
104export const verySlowMstDiff = async (
105 ns: NodeStore,
106 rootA: string,
107 rootB: string,
108): Promise<[Set<string>, Set<string>]> => {
109 const walkerA = await NodeWalker.create(ns, rootA);
110 const walkerB = await NodeWalker.create(ns, rootB);
111
112 const nodesA = new Set<string>();
113 for await (const cid of iterNodeCids(walkerA)) {
114 nodesA.add(cid);
115 }
116
117 const nodesB = new Set<string>();
118 for await (const cid of iterNodeCids(walkerB)) {
119 nodesB.add(cid);
120 }
121
122 const created = new Set<string>();
123 for (const cid of nodesB) {
124 if (!nodesA.has(cid)) {
125 created.add(cid);
126 }
127 }
128
129 const deleted = new Set<string>();
130 for (const cid of nodesA) {
131 if (!nodesB.has(cid)) {
132 deleted.add(cid);
133 }
134 }
135
136 return [created, deleted];
137};
138
139/**
140 * Helper to iterate over all node CIDs in a tree
141 */
142async function* iterNodeCids(walker: NodeWalker): AsyncGenerator<string> {
143 // Always yield the current node
144 yield (await walker.frame.node.cid()).$link;
145
146 // Recursively iterate through the tree
147 while (!walker.done) {
148 if (walker.subtree !== null) {
149 await walker.down();
150 yield (await walker.frame.node.cid()).$link;
151 } else {
152 walker.rightOrUp();
153 }
154 }
155}
156
157// precomputed from `(await MSTNode.empty().cid()).$link`
158const EMPTY_NODE_CID = 'bafyreie5737gdxlw5i64vzichcalba3z2v5n6icifvx5xytvske7mr3hpm';
159
160/**
161 * Efficiently computes the difference between two MSTs
162 * @param ns the node store
163 * @param rootA CID of first MST root
164 * @param rootB CID of second MST root
165 * @returns tuple of [created nodes, deleted nodes]
166 */
167export const mstDiff = async (
168 ns: NodeStore,
169 rootA: string,
170 rootB: string,
171): Promise<[Set<string>, Set<string>]> => {
172 const created = new Set<string>(); // nodes in B but not in A
173 const deleted = new Set<string>(); // nodes in A but not in B
174
175 const walkerA = await NodeWalker.create(ns, rootA);
176 const walkerB = await NodeWalker.create(ns, rootB);
177
178 await mstDiffRecursive(created, deleted, walkerA, walkerB);
179
180 // Remove false positives (nodes that appeared in both sets)
181 const middle = new Set<string>();
182 for (const cid of created) {
183 if (deleted.has(cid)) {
184 middle.add(cid);
185 }
186 }
187
188 for (const cid of middle) {
189 created.delete(cid);
190 deleted.delete(cid);
191 }
192
193 // Special case: if one of the root nodes was empty
194 if (rootA === EMPTY_NODE_CID && rootB !== EMPTY_NODE_CID) {
195 deleted.add(EMPTY_NODE_CID);
196 }
197 if (rootB === EMPTY_NODE_CID && rootA !== EMPTY_NODE_CID) {
198 created.add(EMPTY_NODE_CID);
199 }
200
201 return [created, deleted];
202};
203
204/**
205 * Recursive helper for mstDiff
206 * Theory: most trees that get compared will have lots of shared blocks (which we can skip over)
207 * Completely different trees will inevitably have to visit every node.
208 */
209const mstDiffRecursive = async (
210 created: Set<string>,
211 deleted: Set<string>,
212 a: NodeWalker,
213 b: NodeWalker,
214): Promise<void> => {
215 // Easiest case: nodes are identical
216 const aNodeCid = (await a.frame.node.cid()).$link;
217 const bNodeCid = (await b.frame.node.cid()).$link;
218
219 if (aNodeCid === bNodeCid) {
220 return; // no difference
221 }
222
223 // Trivial case: a is empty, all of b is new
224 if (a.frame.node.isEmpty) {
225 for await (const cid of iterNodeCids(b)) {
226 created.add(cid);
227 }
228 return;
229 }
230
231 // Likewise: b is empty, all of a is deleted
232 if (b.frame.node.isEmpty) {
233 for await (const cid of iterNodeCids(a)) {
234 deleted.add(cid);
235 }
236 return;
237 }
238
239 // Now we're onto the hard part
240 // NB: these will end up as false-positives if one tree is a subtree of the other
241 created.add(bNodeCid);
242 deleted.add(aNodeCid);
243
244 // General idea:
245 // 1. If one cursor is "behind" the other, catch it up
246 // 2. When we're matched up, skip over identical subtrees (and recursively diff non-identical subtrees)
247 while (true) {
248 // Catch up whichever cursor is behind
249 while (a.rpath !== b.rpath) {
250 // Catch up cursor a if it's behind
251 while (a.rpath < b.rpath && !a.done) {
252 if (a.subtree !== null) {
253 await a.down();
254 deleted.add((await a.frame.node.cid()).$link);
255 } else {
256 a.rightOrUp();
257 }
258 }
259
260 // Catch up cursor b likewise
261 while (b.rpath < a.rpath && !b.done) {
262 if (b.subtree !== null) {
263 await b.down();
264 created.add((await b.frame.node.cid()).$link);
265 } else {
266 b.rightOrUp();
267 }
268 }
269 }
270
271 // The rpaths now match, but the subtrees below us might not
272 // Recursively diff the subtrees
273 const aSubWalker = await a.createSubtreeWalker();
274 const bSubWalker = await b.createSubtreeWalker();
275 await mstDiffRecursive(created, deleted, aSubWalker, bSubWalker);
276
277 // Check if we can still go right
278 const aBottom = a.stack.peekBottom();
279 const bBottom = b.stack.peekBottom();
280
281 if (
282 aBottom !== undefined &&
283 bBottom !== undefined &&
284 a.rpath === aBottom.rpath &&
285 b.rpath === bBottom.rpath
286 ) {
287 break;
288 }
289
290 a.rightOrUp();
291 b.rightOrUp();
292 }
293};