A simple tool which lets you scrape twitter accounts and crosspost them to bluesky accounts! Comes with a CLI and a webapp for managing profiles! Works with images/videos/link embeds/threads.

v1.0.3: Add random pacing and auto-recovery for Query IDs

jack 748b51c8 c1c7bd05

+64 -49
+2 -1
README.md
··· 12 12 * **Links:** Automatically removes `t.co` tracking links and expands them to their real destinations. 13 13 * **Smart Features:** 14 14 * **Language Detection:** Automatically detects the language of your tweet (e.g., English, Japanese) and tags the Bluesky post correctly. 15 - * **Human-like Pacing:** Adds a small delay between posts to prevent spam detection and rate limits. 15 + * **Human-like Pacing:** Randomly waits (1-4s) between posts to behave more like a real user and avoid spam detection. 16 + * **Auto-Healing:** Automatically rotates internal Twitter Query IDs if they expire, ensuring the tool keeps working 24/7 without manual intervention. 16 17 * **Threads & Replies:** 17 18 * **Perfect Threading:** If you write a thread (reply to yourself) on Twitter, it appears as a threaded conversation on Bluesky. 18 19 * **Clean Feed:** Automatically filters out your replies to *other* people, keeping your Bluesky timeline focused on your original content.
+61 -47
index.js
··· 7 7 const { TwitterClient } = require('@steipete/bird/dist/lib/twitter-client'); 8 8 const franc = require('franc-min'); 9 9 const iso6391 = require('iso-639-1'); 10 + const { exec } = require('child_process'); 10 11 11 12 // Configuration 12 13 const TWITTER_AUTH_TOKEN = process.env.TWITTER_AUTH_TOKEN; ··· 147 148 return "me"; 148 149 } 149 150 151 + function getRandomDelay(min = 1000, max = 4000) { 152 + return Math.floor(Math.random() * (max - min + 1) + min); 153 + } 154 + 155 + function refreshQueryIds() { 156 + return new Promise((resolve) => { 157 + console.log("⚠️ Attempting to refresh Twitter Query IDs via 'bird' CLI..."); 158 + exec('./node_modules/.bin/bird query-ids --fresh', (error, stdout, stderr) => { 159 + if (error) { 160 + console.error(`Error refreshing IDs: ${error.message}`); 161 + console.error(`Stderr: ${stderr}`); 162 + } else { 163 + console.log("✅ Query IDs refreshed successfully."); 164 + } 165 + resolve(); 166 + }); 167 + }); 168 + } 169 + 170 + /** 171 + * Wraps twitter.search with auto-recovery for stale Query IDs 172 + */ 173 + async function safeSearch(query, limit) { 174 + try { 175 + const result = await twitter.search(query, limit); 176 + // Sometimes it returns success: false but no throw 177 + if (!result.success && result.error && 178 + (result.error.toString().includes('GraphQL') || result.error.toString().includes('404'))) { 179 + throw new Error(result.error); 180 + } 181 + return result; 182 + } catch (err) { 183 + console.warn(`Search encountered an error: ${err.message || err}`); 184 + if (err.message && (err.message.includes('GraphQL') || err.message.includes('404') || err.message.includes('Bad Guest Token'))) { 185 + await refreshQueryIds(); 186 + console.log("Retrying search..."); 187 + return await twitter.search(query, limit); 188 + } 189 + return { success: false, error: err }; 190 + } 191 + } 192 + 150 193 // --- Main Processing Logic --- 151 194 152 - async function processTweets(tweets, delayBetweenPosts = 1000) { 195 + async function processTweets(tweets) { 153 196 // Ensure chronological order 154 197 tweets.reverse(); 155 198 ··· 162 205 // --- Filter Replies (unless we are maintaining a thread) --- 163 206 // If it's a reply, but the parent IS in our DB, we want to post it as a reply. 164 207 // If it's a reply to someone else (or a thread we missed), we skip it based on user preference (only original tweets). 165 - // User asked: "if i do it on twitter... it should continue out a thread". 166 208 167 209 const replyStatusId = tweet.in_reply_to_status_id_str || tweet.in_reply_to_status_id; 168 210 const replyUserId = tweet.in_reply_to_user_id_str || tweet.in_reply_to_user_id; ··· 223 265 } 224 266 225 267 // Aspect Ratio Extraction 226 - // Twitter gives sizes: { large: { w, h, resize }, ... } 227 268 let aspectRatio = undefined; 228 269 if (media.sizes?.large) { 229 270 aspectRatio = { width: media.sizes.large.w, height: media.sizes.large.h }; ··· 233 274 234 275 if (media.type === 'photo') { 235 276 const url = media.media_url_https; 236 - // console.log(`Downloading image: ${url}`); 237 277 try { 238 278 const { buffer, mimeType } = await downloadMedia(url); 239 279 const blob = await uploadToBluesky(buffer, mimeType); ··· 251 291 252 292 if (mp4s.length > 0) { 253 293 const videoUrl = mp4s[0].url; 254 - // console.log(`Downloading video: ${videoUrl}`); 255 294 try { 256 295 const { buffer, mimeType } = await downloadMedia(videoUrl); 257 296 ··· 284 323 if (tweet.is_quote_status && tweet.quoted_status_id_str) { 285 324 const quoteId = tweet.quoted_status_id_str; 286 325 if (processedTweets[quoteId] && !processedTweets[quoteId].migrated) { 287 - // We have the quoted tweet in our history! 288 326 const ref = processedTweets[quoteId]; 289 327 quoteEmbed = { 290 328 $type: 'app.bsky.embed.record', ··· 293 331 cid: ref.cid 294 332 } 295 333 }; 296 - // Remove the quote URL from text if present (usually at the end) 297 - // Twitter API usually includes the quote URL in entities.urls, so it might be expanded already. 298 - // We should find the url that points to the tweet and remove it. 299 - // A simple heuristic: remove the last url if it looks like a twitter link to the quote. 300 334 } 301 335 } 302 336 ··· 312 346 createdAt: tweet.created_at ? new Date(tweet.created_at).toISOString() : new Date().toISOString() 313 347 }; 314 348 315 - // Attach Embeds (Complex Logic for handling Media + Quote) 349 + // Attach Embeds 316 350 if (videoBlob) { 317 - // Video + Quote is not natively supported in one simple embed field yet in standard way without recordWithMedia? 318 - // Actually recordWithMedia supports Images + Record. Does it support Video + Record? 319 - // Currently app.bsky.embed.video is standalone. 320 - // If we have video AND quote, we might have to drop the quote embed or just link it. 321 - // For now: Prioritize Video. 322 351 postRecord.embed = { 323 352 $type: 'app.bsky.embed.video', 324 353 video: videoBlob, ··· 331 360 }; 332 361 333 362 if (quoteEmbed) { 334 - // Media + Quote -> app.bsky.embed.recordWithMedia 335 363 postRecord.embed = { 336 364 $type: 'app.bsky.embed.recordWithMedia', 337 365 media: imagesEmbed, ··· 357 385 const response = await agent.post(postRecord); 358 386 // console.log(`Posted: ${tweetId}`); 359 387 360 - // Save with Threading Info 361 388 const newEntry = { 362 389 uri: response.uri, 363 390 cid: response.cid, ··· 367 394 processedTweets[tweetId] = newEntry; 368 395 saveProcessedTweets(); 369 396 370 - // Pacing 371 - if (delayBetweenPosts > 0) { 372 - // Min delay + random jitter (0-500ms) 373 - const sleepTime = delayBetweenPosts + Math.floor(Math.random() * 500); 374 - // console.log(`Sleeping ${sleepTime}ms...`); 375 - await new Promise(r => setTimeout(r, sleepTime)); 376 - } 397 + // Random Pacing (1s - 4s) 398 + const sleepTime = getRandomDelay(1000, 4000); 399 + // console.log(`Sleeping ${sleepTime}ms...`); 400 + await new Promise(r => setTimeout(r, sleepTime)); 377 401 378 402 } catch (err) { 379 403 console.error(`Failed to post ${tweetId}:`, err); ··· 387 411 try { 388 412 const username = await getUsername(); 389 413 390 - // We still filter replies at source to save API calls, 391 - // but our processTweets logic now handles "threading" if we accidentally fetch a reply 392 - // (or if we remove the filter later). 393 - // Current requirement: "filter replies" but "continue thread". 394 - // If we filter replies in search, we WON'T see our own replies to thread them. 395 - // So we MUST remove -filter:replies from the search if we want to support threading. 396 - // BUT user said "it's also posting all my replies which i don't want... it should only crosspost original Tweets". 397 - // AND "if i do it on twitter... it should continue out a thread". 398 - 399 - // Solution: Fetch EVERYTHING (no -filter:replies), but in `processTweets`, 400 - // ONLY post if it is NOT a reply OR if it is a reply to a KNOWN parent in `processedTweets`. 401 - 402 - const query = `from:${username}`; // Removed -filter:replies to allow threading checks 403 - const result = await twitter.search(query, 30); // Fetch a few more to be safe 414 + // Use safeSearch with auto-refresh for IDs 415 + const query = `from:${username}`; 416 + const result = await safeSearch(query, 30); 404 417 405 418 if (!result.success) { 406 419 console.error("Failed to fetch tweets:", result.error); ··· 410 423 const tweets = result.tweets || []; 411 424 if (tweets.length === 0) return; 412 425 413 - await processTweets(tweets, 1000); // 1s delay for live checks 426 + await processTweets(tweets); 414 427 415 428 } catch (err) { 416 429 console.error("Error in checkAndPost:", err); ··· 429 442 const seenIds = new Set(); 430 443 431 444 while (keepGoing) { 432 - // We fetch everything (including replies) so we can thread them if valid 433 - let query = `from:${username}`; 445 + let query = `from:${username}`; 434 446 if (maxId) { 435 447 query += ` max_id:${maxId}`; 436 448 } 437 449 438 450 console.log(`Fetching batch... (Collected: ${allFoundTweets.length})`); 439 451 440 - const result = await twitter.search(query, count); 452 + const result = await safeSearch(query, count); 441 453 442 454 if (!result.success) { 443 455 console.error("Fetch failed:", result.error); ··· 472 484 console.log(`Fetch complete. Found ${allFoundTweets.length} new tweets to import.`); 473 485 474 486 if (allFoundTweets.length > 0) { 475 - console.log("Starting processing (Oldest -> Newest) with pacing..."); 476 - // 1 seconds delay average for human-like backfill 477 - await processTweets(allFoundTweets, 1000); 487 + console.log("Starting processing (Oldest -> Newest) with random pacing..."); 488 + await processTweets(allFoundTweets); 478 489 console.log("History import complete."); 479 490 } else { 480 491 console.log("Nothing new to import."); ··· 501 512 process.exit(0); 502 513 } 503 514 515 + // Refresh IDs on startup just to be safe/fresh 516 + // await refreshQueryIds(); 517 + 504 518 await checkAndPost(); 505 519 506 520 console.log(`Scheduling check every ${CHECK_INTERVAL_MINUTES} minutes.`); 507 521 cron.schedule(`*/${CHECK_INTERVAL_MINUTES} * * * *`, checkAndPost); 508 - })(); 522 + })();
+1 -1
package.json
··· 1 1 { 2 2 "name": "tweets-2-bsky", 3 - "version": "1.0.2", 3 + "version": "1.0.3", 4 4 "description": "A powerful tool to crosspost Tweets to Bluesky, supporting threads, videos, and high-quality images.", 5 5 "main": "index.js", 6 6 "scripts": {