a collection of lightweight TypeScript packages for AT Protocol, the protocol powering Bluesky
at trunk 293 lines 7.4 kB view raw
1import type { CidLink } from '@atcute/cid'; 2 3import type { NodeStore } from './node-store.ts'; 4import { NodeWalker } from './node-walker.ts'; 5 6/** 7 * Type of change to a record 8 */ 9export enum DeltaType { 10 CREATED = 1, 11 UPDATED = 2, 12 DELETED = 3, 13} 14 15/** 16 * Represents a change to a single record 17 */ 18export interface RecordDelta { 19 /** type of change */ 20 deltaType: DeltaType; 21 /** record path (collection/rkey) */ 22 path: string; 23 /** CID before the change (null for creates) */ 24 priorValue: CidLink | null; 25 /** CID after the change (null for deletes) */ 26 laterValue: CidLink | null; 27} 28 29/** 30 * Given two sets of MST nodes, returns an iterator of record-level changes 31 * @param ns the node store 32 * @param created set of node CIDs that were created 33 * @param deleted set of node CIDs that were deleted 34 * @yields record deltas describing the changes 35 */ 36export async function* recordDiff( 37 ns: NodeStore, 38 created: Set<string>, 39 deleted: Set<string>, 40): AsyncGenerator<RecordDelta> { 41 // Build maps of all keys and values in created/deleted nodes 42 const createdKv = new Map<string, CidLink>(); 43 for (const cid of created) { 44 const node = await ns.get(cid); 45 for (let i = 0; i < node.keys.length; i++) { 46 createdKv.set(node.keys[i], node.values[i]); 47 } 48 } 49 50 const deletedKv = new Map<string, CidLink>(); 51 for (const cid of deleted) { 52 const node = await ns.get(cid); 53 for (let i = 0; i < node.keys.length; i++) { 54 deletedKv.set(node.keys[i], node.values[i]); 55 } 56 } 57 58 // Find keys that were created (in created but not deleted) 59 for (const [key, value] of createdKv) { 60 if (!deletedKv.has(key)) { 61 yield { 62 deltaType: DeltaType.CREATED, 63 path: key, 64 priorValue: null, 65 laterValue: value, 66 }; 67 } 68 } 69 70 // Find keys that were updated (in both, but with different values) 71 for (const [key, newValue] of createdKv) { 72 const oldValue = deletedKv.get(key); 73 if (oldValue !== undefined && oldValue.$link !== newValue.$link) { 74 yield { 75 deltaType: DeltaType.UPDATED, 76 path: key, 77 priorValue: oldValue, 78 laterValue: newValue, 79 }; 80 } 81 } 82 83 // Find keys that were deleted (in deleted but not created) 84 for (const [key, value] of deletedKv) { 85 if (!createdKv.has(key)) { 86 yield { 87 deltaType: DeltaType.DELETED, 88 path: key, 89 priorValue: value, 90 laterValue: null, 91 }; 92 } 93 } 94} 95 96/** 97 * Slow but obvious MST diff implementation for testing 98 * Enumerates all nodes in both trees and compares them 99 * @param ns the node store 100 * @param rootA CID of first MST root 101 * @param rootB CID of second MST root 102 * @returns tuple of [created nodes, deleted nodes] 103 */ 104export const verySlowMstDiff = async ( 105 ns: NodeStore, 106 rootA: string, 107 rootB: string, 108): Promise<[Set<string>, Set<string>]> => { 109 const walkerA = await NodeWalker.create(ns, rootA); 110 const walkerB = await NodeWalker.create(ns, rootB); 111 112 const nodesA = new Set<string>(); 113 for await (const cid of iterNodeCids(walkerA)) { 114 nodesA.add(cid); 115 } 116 117 const nodesB = new Set<string>(); 118 for await (const cid of iterNodeCids(walkerB)) { 119 nodesB.add(cid); 120 } 121 122 const created = new Set<string>(); 123 for (const cid of nodesB) { 124 if (!nodesA.has(cid)) { 125 created.add(cid); 126 } 127 } 128 129 const deleted = new Set<string>(); 130 for (const cid of nodesA) { 131 if (!nodesB.has(cid)) { 132 deleted.add(cid); 133 } 134 } 135 136 return [created, deleted]; 137}; 138 139/** 140 * Helper to iterate over all node CIDs in a tree 141 */ 142async function* iterNodeCids(walker: NodeWalker): AsyncGenerator<string> { 143 // Always yield the current node 144 yield (await walker.frame.node.cid()).$link; 145 146 // Recursively iterate through the tree 147 while (!walker.done) { 148 if (walker.subtree !== null) { 149 await walker.down(); 150 yield (await walker.frame.node.cid()).$link; 151 } else { 152 walker.rightOrUp(); 153 } 154 } 155} 156 157// precomputed from `(await MSTNode.empty().cid()).$link` 158const EMPTY_NODE_CID = 'bafyreie5737gdxlw5i64vzichcalba3z2v5n6icifvx5xytvske7mr3hpm'; 159 160/** 161 * Efficiently computes the difference between two MSTs 162 * @param ns the node store 163 * @param rootA CID of first MST root 164 * @param rootB CID of second MST root 165 * @returns tuple of [created nodes, deleted nodes] 166 */ 167export const mstDiff = async ( 168 ns: NodeStore, 169 rootA: string, 170 rootB: string, 171): Promise<[Set<string>, Set<string>]> => { 172 const created = new Set<string>(); // nodes in B but not in A 173 const deleted = new Set<string>(); // nodes in A but not in B 174 175 const walkerA = await NodeWalker.create(ns, rootA); 176 const walkerB = await NodeWalker.create(ns, rootB); 177 178 await mstDiffRecursive(created, deleted, walkerA, walkerB); 179 180 // Remove false positives (nodes that appeared in both sets) 181 const middle = new Set<string>(); 182 for (const cid of created) { 183 if (deleted.has(cid)) { 184 middle.add(cid); 185 } 186 } 187 188 for (const cid of middle) { 189 created.delete(cid); 190 deleted.delete(cid); 191 } 192 193 // Special case: if one of the root nodes was empty 194 if (rootA === EMPTY_NODE_CID && rootB !== EMPTY_NODE_CID) { 195 deleted.add(EMPTY_NODE_CID); 196 } 197 if (rootB === EMPTY_NODE_CID && rootA !== EMPTY_NODE_CID) { 198 created.add(EMPTY_NODE_CID); 199 } 200 201 return [created, deleted]; 202}; 203 204/** 205 * Recursive helper for mstDiff 206 * Theory: most trees that get compared will have lots of shared blocks (which we can skip over) 207 * Completely different trees will inevitably have to visit every node. 208 */ 209const mstDiffRecursive = async ( 210 created: Set<string>, 211 deleted: Set<string>, 212 a: NodeWalker, 213 b: NodeWalker, 214): Promise<void> => { 215 // Easiest case: nodes are identical 216 const aNodeCid = (await a.frame.node.cid()).$link; 217 const bNodeCid = (await b.frame.node.cid()).$link; 218 219 if (aNodeCid === bNodeCid) { 220 return; // no difference 221 } 222 223 // Trivial case: a is empty, all of b is new 224 if (a.frame.node.isEmpty) { 225 for await (const cid of iterNodeCids(b)) { 226 created.add(cid); 227 } 228 return; 229 } 230 231 // Likewise: b is empty, all of a is deleted 232 if (b.frame.node.isEmpty) { 233 for await (const cid of iterNodeCids(a)) { 234 deleted.add(cid); 235 } 236 return; 237 } 238 239 // Now we're onto the hard part 240 // NB: these will end up as false-positives if one tree is a subtree of the other 241 created.add(bNodeCid); 242 deleted.add(aNodeCid); 243 244 // General idea: 245 // 1. If one cursor is "behind" the other, catch it up 246 // 2. When we're matched up, skip over identical subtrees (and recursively diff non-identical subtrees) 247 while (true) { 248 // Catch up whichever cursor is behind 249 while (a.rpath !== b.rpath) { 250 // Catch up cursor a if it's behind 251 while (a.rpath < b.rpath && !a.done) { 252 if (a.subtree !== null) { 253 await a.down(); 254 deleted.add((await a.frame.node.cid()).$link); 255 } else { 256 a.rightOrUp(); 257 } 258 } 259 260 // Catch up cursor b likewise 261 while (b.rpath < a.rpath && !b.done) { 262 if (b.subtree !== null) { 263 await b.down(); 264 created.add((await b.frame.node.cid()).$link); 265 } else { 266 b.rightOrUp(); 267 } 268 } 269 } 270 271 // The rpaths now match, but the subtrees below us might not 272 // Recursively diff the subtrees 273 const aSubWalker = await a.createSubtreeWalker(); 274 const bSubWalker = await b.createSubtreeWalker(); 275 await mstDiffRecursive(created, deleted, aSubWalker, bSubWalker); 276 277 // Check if we can still go right 278 const aBottom = a.stack.peekBottom(); 279 const bBottom = b.stack.peekBottom(); 280 281 if ( 282 aBottom !== undefined && 283 bBottom !== undefined && 284 a.rpath === aBottom.rpath && 285 b.rpath === bBottom.rpath 286 ) { 287 break; 288 } 289 290 a.rightOrUp(); 291 b.rightOrUp(); 292 } 293};