A simple tool which lets you scrape twitter accounts and crosspost them to bluesky accounts! Comes with a CLI and a webapp for managing profiles! Works with images/videos/link embeds/threads.
at main 407 lines 13 kB view raw
1import fs from 'node:fs'; 2import path from 'node:path'; 3import { fileURLToPath } from 'node:url'; 4import Database from 'better-sqlite3'; 5 6const __filename = fileURLToPath(import.meta.url); 7const __dirname = path.dirname(__filename); 8 9const DB_DIR = path.join(__dirname, '..', 'data'); 10if (!fs.existsSync(DB_DIR)) { 11 fs.mkdirSync(DB_DIR); 12} 13 14const db = new Database(path.join(DB_DIR, 'database.sqlite')); 15 16// Enable WAL mode for better concurrency 17db.pragma('journal_mode = WAL'); 18 19// --- Migration Support --- 20const tableInfo = db.prepare('PRAGMA table_info(processed_tweets)').all() as any[]; 21 22if (tableInfo.length > 0) { 23 let schemaChanged = false; 24 const hasBskyIdentifier = tableInfo.some((col) => col.name === 'bsky_identifier'); 25 const hasTweetText = tableInfo.some((col) => col.name === 'tweet_text'); 26 const hasTailUri = tableInfo.some((col) => col.name === 'bsky_tail_uri'); 27 28 if (!hasBskyIdentifier || !hasTweetText || !hasTailUri) { 29 console.log('🔄 Upgrading database schema...'); 30 31 // SQLite doesn't support easy PK changes, so we recreate the table if identifier is missing 32 // Or if we just need to add a column, we can do ALTER TABLE if it's not the PK. 33 // However, since we might need to do both or one, let's just do the full migration pattern 34 // to be safe and consistent. 35 36 db.transaction(() => { 37 // 1. Rename existing table 38 db.exec(`ALTER TABLE processed_tweets RENAME TO processed_tweets_old;`); 39 40 // 2. Create new table with all columns 41 db.exec(` 42 CREATE TABLE processed_tweets ( 43 twitter_id TEXT NOT NULL, 44 twitter_username TEXT NOT NULL, 45 bsky_identifier TEXT NOT NULL, 46 tweet_text TEXT, 47 bsky_uri TEXT, 48 bsky_cid TEXT, 49 bsky_root_uri TEXT, 50 bsky_root_cid TEXT, 51 bsky_tail_uri TEXT, 52 bsky_tail_cid TEXT, 53 status TEXT NOT NULL, 54 created_at DATETIME DEFAULT CURRENT_TIMESTAMP, 55 PRIMARY KEY (twitter_id, bsky_identifier) 56 ); 57 `); 58 59 // 3. Migrate data 60 // Handle the case where the old table might not have had bsky_identifier 61 const oldColumns = tableInfo.map((c) => c.name); 62 63 // Construct the SELECT part based on available old columns 64 // If old table didn't have bsky_identifier, we default to 'unknown' 65 const identifierSelect = oldColumns.includes('bsky_identifier') ? 'bsky_identifier' : "'unknown'"; 66 67 // If old table didn't have tweet_text, we default to NULL 68 const textSelect = oldColumns.includes('tweet_text') ? 'tweet_text' : "NULL"; 69 70 const tailUriSelect = oldColumns.includes('bsky_tail_uri') ? 'bsky_tail_uri' : "NULL"; 71 const tailCidSelect = oldColumns.includes('bsky_tail_cid') ? 'bsky_tail_cid' : "NULL"; 72 73 db.exec(` 74 INSERT INTO processed_tweets ( 75 twitter_id, 76 twitter_username, 77 bsky_identifier, 78 tweet_text, 79 bsky_uri, 80 bsky_cid, 81 bsky_root_uri, 82 bsky_root_cid, 83 bsky_tail_uri, 84 bsky_tail_cid, 85 status, 86 created_at 87 ) 88 SELECT 89 twitter_id, 90 twitter_username, 91 ${identifierSelect}, 92 ${textSelect}, 93 bsky_uri, 94 bsky_cid, 95 bsky_root_uri, 96 bsky_root_cid, 97 ${tailUriSelect}, 98 ${tailCidSelect}, 99 status, 100 created_at 101 FROM processed_tweets_old; 102 `); 103 104 // 4. Drop old table 105 db.exec(`DROP TABLE processed_tweets_old;`); 106 })(); 107 console.log('✅ Database upgraded successfully.'); 108 } 109} else { 110 // Initialize fresh schema 111 db.exec(` 112 CREATE TABLE IF NOT EXISTS processed_tweets ( 113 twitter_id TEXT NOT NULL, 114 twitter_username TEXT NOT NULL, 115 bsky_identifier TEXT NOT NULL, 116 tweet_text TEXT, 117 bsky_uri TEXT, 118 bsky_cid TEXT, 119 bsky_root_uri TEXT, 120 bsky_root_cid TEXT, 121 bsky_tail_uri TEXT, 122 bsky_tail_cid TEXT, 123 status TEXT NOT NULL, -- 'migrated', 'skipped', 'failed' 124 created_at DATETIME DEFAULT CURRENT_TIMESTAMP, 125 PRIMARY KEY (twitter_id, bsky_identifier) 126 ); 127 `); 128} 129 130db.exec(` 131 CREATE INDEX IF NOT EXISTS idx_twitter_username ON processed_tweets(twitter_username); 132 CREATE INDEX IF NOT EXISTS idx_bsky_identifier ON processed_tweets(bsky_identifier); 133`); 134 135export interface ProcessedTweet { 136 twitter_id: string; 137 twitter_username: string; 138 bsky_identifier: string; 139 tweet_text?: string; 140 bsky_uri?: string; 141 bsky_cid?: string; 142 bsky_root_uri?: string; 143 bsky_root_cid?: string; 144 bsky_tail_uri?: string; 145 bsky_tail_cid?: string; 146 status: 'migrated' | 'skipped' | 'failed'; 147 created_at?: string; 148} 149 150export interface ProcessedTweetSearchResult extends ProcessedTweet { 151 score: number; 152} 153 154function normalizeSearchValue(value: string): string { 155 return value 156 .toLowerCase() 157 .replace(/[^a-z0-9@#._\-\s]+/g, ' ') 158 .replace(/\s+/g, ' ') 159 .trim(); 160} 161 162function tokenizeSearchValue(value: string): string[] { 163 if (!value) { 164 return []; 165 } 166 return value.split(' ').filter((token) => token.length > 0); 167} 168 169function orderedSubsequenceScore(query: string, candidate: string): number { 170 if (!query || !candidate) { 171 return 0; 172 } 173 174 let matched = 0; 175 let searchIndex = 0; 176 for (const char of query) { 177 const foundIndex = candidate.indexOf(char, searchIndex); 178 if (foundIndex === -1) { 179 continue; 180 } 181 matched += 1; 182 searchIndex = foundIndex + 1; 183 } 184 185 return matched / query.length; 186} 187 188function buildBigrams(value: string): Set<string> { 189 const result = new Set<string>(); 190 if (value.length < 2) { 191 if (value.length === 1) { 192 result.add(value); 193 } 194 return result; 195 } 196 197 for (let i = 0; i < value.length - 1; i += 1) { 198 result.add(value.slice(i, i + 2)); 199 } 200 201 return result; 202} 203 204function diceCoefficient(a: string, b: string): number { 205 const aBigrams = buildBigrams(a); 206 const bBigrams = buildBigrams(b); 207 if (aBigrams.size === 0 || bBigrams.size === 0) { 208 return 0; 209 } 210 211 let overlap = 0; 212 for (const gram of aBigrams) { 213 if (bBigrams.has(gram)) { 214 overlap += 1; 215 } 216 } 217 218 return (2 * overlap) / (aBigrams.size + bBigrams.size); 219} 220 221function scoreCandidateField(query: string, tokens: string[], candidateValue?: string): number { 222 const candidate = normalizeSearchValue(candidateValue || ''); 223 if (!query || !candidate) { 224 return 0; 225 } 226 227 let score = 0; 228 if (candidate === query) { 229 score += 170; 230 } else if (candidate.startsWith(query)) { 231 score += 140; 232 } else if (candidate.includes(query)) { 233 score += 112; 234 } 235 236 let matchedTokens = 0; 237 for (const token of tokens) { 238 if (candidate.includes(token)) { 239 matchedTokens += 1; 240 score += token.length >= 4 ? 18 : 12; 241 } 242 } 243 244 if (tokens.length > 0) { 245 score += (matchedTokens / tokens.length) * 48; 246 } 247 248 score += orderedSubsequenceScore(query, candidate) * 46; 249 score += diceCoefficient(query, candidate) * 55; 250 251 return score; 252} 253 254function scoreProcessedTweet(tweet: ProcessedTweet, query: string, tokens: string[]): number { 255 const usernameScore = scoreCandidateField(query, tokens, tweet.twitter_username) * 1.25; 256 const identifierScore = scoreCandidateField(query, tokens, tweet.bsky_identifier) * 1.18; 257 const textScore = scoreCandidateField(query, tokens, tweet.tweet_text) * 0.98; 258 const idScore = scoreCandidateField(query, tokens, tweet.twitter_id) * 0.72; 259 260 const maxScore = Math.max(usernameScore, identifierScore, textScore, idScore); 261 const blendedScore = maxScore + (usernameScore + identifierScore + textScore + idScore - maxScore) * 0.22; 262 263 const recencyBoost = (() => { 264 if (!tweet.created_at) return 0; 265 const timestamp = Date.parse(tweet.created_at); 266 if (!Number.isFinite(timestamp)) return 0; 267 const ageDays = (Date.now() - timestamp) / (24 * 60 * 60 * 1000); 268 return Math.max(0, 7 - ageDays); 269 })(); 270 271 return blendedScore + recencyBoost; 272} 273 274export const dbService = { 275 getTweet(twitterId: string, bskyIdentifier: string): ProcessedTweet | null { 276 const stmt = db.prepare('SELECT * FROM processed_tweets WHERE twitter_id = ? AND bsky_identifier = ?'); 277 const row = stmt.get(twitterId, bskyIdentifier) as any; 278 if (!row) return null; 279 return { 280 twitter_id: row.twitter_id, 281 twitter_username: row.twitter_username, 282 bsky_identifier: row.bsky_identifier, 283 tweet_text: row.tweet_text, 284 bsky_uri: row.bsky_uri, 285 bsky_cid: row.bsky_cid, 286 bsky_root_uri: row.bsky_root_uri, 287 bsky_root_cid: row.bsky_root_cid, 288 bsky_tail_uri: row.bsky_tail_uri, 289 bsky_tail_cid: row.bsky_tail_cid, 290 status: row.status, 291 created_at: row.created_at 292 }; 293 }, 294 295 saveTweet(tweet: ProcessedTweet) { 296 const stmt = db.prepare(` 297 INSERT OR REPLACE INTO processed_tweets 298 (twitter_id, twitter_username, bsky_identifier, tweet_text, bsky_uri, bsky_cid, bsky_root_uri, bsky_root_cid, bsky_tail_uri, bsky_tail_cid, status) 299 VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) 300 `); 301 stmt.run( 302 tweet.twitter_id, 303 tweet.twitter_username, 304 tweet.bsky_identifier, 305 tweet.tweet_text || null, 306 tweet.bsky_uri || null, 307 tweet.bsky_cid || null, 308 tweet.bsky_root_uri || null, 309 tweet.bsky_root_cid || null, 310 tweet.bsky_tail_uri || null, 311 tweet.bsky_tail_cid || null, 312 tweet.status, 313 ); 314 }, 315 316 getTweetsByBskyIdentifier(bskyIdentifier: string): Record<string, any> { 317 const stmt = db.prepare('SELECT * FROM processed_tweets WHERE bsky_identifier = ?'); 318 const rows = stmt.all(bskyIdentifier.toLowerCase()) as any[]; 319 const map: Record<string, any> = {}; 320 for (const row of rows) { 321 map[row.twitter_id] = { 322 uri: row.bsky_uri, 323 cid: row.bsky_cid, 324 root: row.bsky_root_uri ? { uri: row.bsky_root_uri, cid: row.bsky_root_cid } : undefined, 325 tail: (row.bsky_tail_uri && row.bsky_tail_cid) ? { uri: row.bsky_tail_uri, cid: row.bsky_tail_cid } : undefined, 326 migrated: row.status === 'migrated', 327 skipped: row.status === 'skipped', 328 }; 329 } 330 return map; 331 }, 332 333 getTweetsByUsername(username: string): Record<string, any> { 334 const stmt = db.prepare('SELECT * FROM processed_tweets WHERE twitter_username = ?'); 335 const rows = stmt.all(username.toLowerCase()) as any[]; 336 const map: Record<string, any> = {}; 337 for (const row of rows) { 338 map[row.twitter_id] = { 339 uri: row.bsky_uri, 340 cid: row.bsky_cid, 341 root: row.bsky_root_uri ? { uri: row.bsky_root_uri, cid: row.bsky_root_cid } : undefined, 342 tail: (row.bsky_tail_uri && row.bsky_tail_cid) ? { uri: row.bsky_tail_uri, cid: row.bsky_tail_cid } : undefined, 343 migrated: row.status === 'migrated', 344 skipped: row.status === 'skipped' 345 }; 346 } 347 return map; 348 }, 349 350 getRecentProcessedTweets(limit = 50): ProcessedTweet[] { 351 const stmt = db.prepare('SELECT * FROM processed_tweets ORDER BY datetime(created_at) DESC, rowid DESC LIMIT ?'); 352 return stmt.all(limit) as ProcessedTweet[]; 353 }, 354 355 searchMigratedTweets(query: string, limit = 60, scanLimit = 3000): ProcessedTweetSearchResult[] { 356 const normalizedQuery = normalizeSearchValue(query || ''); 357 if (!normalizedQuery) { 358 return []; 359 } 360 361 const safeLimit = Number.isFinite(limit) ? Math.max(1, Math.min(limit, 200)) : 60; 362 const safeScanLimit = Number.isFinite(scanLimit) ? Math.max(safeLimit, Math.min(scanLimit, 8000)) : 3000; 363 const tokens = tokenizeSearchValue(normalizedQuery); 364 365 const stmt = db.prepare( 366 'SELECT * FROM processed_tweets WHERE status = "migrated" ORDER BY datetime(created_at) DESC, rowid DESC LIMIT ?', 367 ); 368 const rows = stmt.all(safeScanLimit) as ProcessedTweet[]; 369 370 return rows 371 .map((row) => ({ 372 ...row, 373 score: scoreProcessedTweet(row, normalizedQuery, tokens), 374 })) 375 .filter((row) => row.score >= 22) 376 .sort((a, b) => { 377 if (b.score !== a.score) { 378 return b.score - a.score; 379 } 380 const aTime = a.created_at ? Date.parse(a.created_at) : 0; 381 const bTime = b.created_at ? Date.parse(b.created_at) : 0; 382 return (Number.isFinite(bTime) ? bTime : 0) - (Number.isFinite(aTime) ? aTime : 0); 383 }) 384 .slice(0, safeLimit); 385 }, 386 387 deleteTweetsByUsername(username: string) { 388 const stmt = db.prepare('DELETE FROM processed_tweets WHERE twitter_username = ?'); 389 stmt.run(username.toLowerCase()); 390 }, 391 392 deleteTweetsByBskyIdentifier(bskyIdentifier: string) { 393 const stmt = db.prepare('DELETE FROM processed_tweets WHERE bsky_identifier = ?'); 394 stmt.run(bskyIdentifier.toLowerCase()); 395 }, 396 397 repairUnknownIdentifiers(twitterUsername: string, bskyIdentifier: string) { 398 const stmt = db.prepare( 399 'UPDATE processed_tweets SET bsky_identifier = ? WHERE bsky_identifier = "unknown" AND twitter_username = ?', 400 ); 401 stmt.run(bskyIdentifier.toLowerCase(), twitterUsername.toLowerCase()); 402 }, 403 404 clearAll() { 405 db.prepare('DELETE FROM processed_tweets').run(); 406 }, 407};