tangled
alpha
login
or
join now
indexx.dev
/
tweets2bsky
forked from
j4ck.xyz/tweets2bsky
0
fork
atom
A simple tool which lets you scrape twitter accounts and crosspost them to bluesky accounts! Comes with a CLI and a webapp for managing profiles! Works with images/videos/link embeds/threads.
0
fork
atom
overview
issues
pulls
pipelines
feat: include card media when available
jack
1 month ago
a072cde6
9a1e7580
+880
-507
1 changed file
expand all
collapse all
unified
split
src
index.ts
+880
-507
src/index.ts
···
2
2
import fs from 'node:fs';
3
3
import path from 'node:path';
4
4
import { fileURLToPath } from 'node:url';
5
5
-
import { BskyAgent, RichText } from '@atproto/api';
5
5
+
import { type BskyAgent, RichText } from '@atproto/api';
6
6
import type { BlobRef } from '@atproto/api';
7
7
import { Scraper } from '@the-convocation/twitter-scraper';
8
8
import type { Tweet as ScraperTweet } from '@the-convocation/twitter-scraper';
9
9
import axios from 'axios';
10
10
+
import * as cheerio from 'cheerio';
10
11
import { Command } from 'commander';
11
12
import * as francModule from 'franc-min';
12
13
import iso6391 from 'iso-639-1';
13
14
import puppeteer from 'puppeteer-core';
14
14
-
import * as cheerio from 'cheerio';
15
15
import sharp from 'sharp';
16
16
import { generateAltText } from './ai-manager.js';
17
17
···
21
21
const __filename = fileURLToPath(import.meta.url);
22
22
const __dirname = path.dirname(__filename);
23
23
24
24
-
// ============================================================================
24
24
+
// ============================================================================
25
25
// Type Definitions
26
26
-
// ============================================================================
26
26
+
// ============================================================================
27
27
28
28
interface ProcessedTweetEntry {
29
29
uri?: string;
···
44
44
expanded_url?: string;
45
45
}
46
46
47
47
+
interface CardImageValue {
48
48
+
url?: string;
49
49
+
width?: number;
50
50
+
height?: number;
51
51
+
alt?: string;
52
52
+
}
53
53
+
54
54
+
interface CardBindingValue {
55
55
+
type?: string;
56
56
+
string_value?: string;
57
57
+
image_value?: CardImageValue;
58
58
+
}
59
59
+
60
60
+
interface CardBindingEntry {
61
61
+
key?: string;
62
62
+
value?: CardBindingValue;
63
63
+
}
64
64
+
65
65
+
type CardBindingValues = Record<string, CardBindingValue> | CardBindingEntry[];
66
66
+
67
67
+
interface TweetCard {
68
68
+
name?: string;
69
69
+
binding_values?: CardBindingValues;
70
70
+
url?: string;
71
71
+
}
72
72
+
47
73
interface MediaSize {
48
74
w: number;
49
75
h: number;
···
78
104
sizes?: MediaSizes;
79
105
original_info?: OriginalInfo;
80
106
video_info?: VideoInfo;
107
107
+
source?: 'tweet' | 'card';
81
108
}
82
109
83
110
interface TweetEntities {
···
105
132
screen_name?: string;
106
133
id_str?: string;
107
134
};
135
135
+
card?: TweetCard | null;
136
136
+
permanentUrl?: string;
108
137
}
109
138
110
139
interface AspectRatio {
···
120
149
121
150
import { dbService } from './db.js';
122
151
123
123
-
// ============================================================================
152
152
+
// ============================================================================
124
153
// State Management
125
125
-
// ============================================================================
154
154
+
// ============================================================================
126
155
127
156
const PROCESSED_DIR = path.join(__dirname, '..', 'processed');
128
157
129
158
async function migrateJsonToSqlite() {
130
159
if (!fs.existsSync(PROCESSED_DIR)) return;
131
131
-
132
132
-
const files = fs.readdirSync(PROCESSED_DIR).filter(f => f.endsWith('.json'));
160
160
+
161
161
+
const files = fs.readdirSync(PROCESSED_DIR).filter((f) => f.endsWith('.json'));
133
162
if (files.length === 0) return;
134
163
135
164
console.log(`📦 Found ${files.length} legacy cache files. Migrating to SQLite...`);
136
165
const config = getConfig();
137
137
-
166
166
+
138
167
for (const file of files) {
139
168
const username = file.replace('.json', '').toLowerCase();
140
169
// Try to find a matching bskyIdentifier from config
141
141
-
const mapping = config.mappings.find(m => m.twitterUsernames.map(u => u.toLowerCase()).includes(username));
170
170
+
const mapping = config.mappings.find((m) => m.twitterUsernames.map((u) => u.toLowerCase()).includes(username));
142
171
const bskyIdentifier = mapping?.bskyIdentifier || 'unknown';
143
172
144
173
try {
145
174
const filePath = path.join(PROCESSED_DIR, file);
146
175
const data = JSON.parse(fs.readFileSync(filePath, 'utf8')) as ProcessedTweetsMap;
147
147
-
176
176
+
148
177
for (const [twitterId, entry] of Object.entries(data)) {
149
178
dbService.saveTweet({
150
179
twitter_id: twitterId,
···
154
183
bsky_cid: entry.cid,
155
184
bsky_root_uri: entry.root?.uri,
156
185
bsky_root_cid: entry.root?.cid,
157
157
-
status: entry.migrated ? 'migrated' : (entry.skipped ? 'skipped' : 'failed')
186
186
+
status: entry.migrated ? 'migrated' : entry.skipped ? 'skipped' : 'failed',
158
187
});
159
188
}
160
189
// Move file to backup
···
172
201
dbService.repairUnknownIdentifiers(username, mapping.bskyIdentifier);
173
202
}
174
203
}
175
175
-
204
204
+
176
205
console.log('✅ Migration complete.');
177
206
}
178
207
···
180
209
return dbService.getTweetsByBskyIdentifier(bskyIdentifier);
181
210
}
182
211
183
183
-
function saveProcessedTweet(twitterUsername: string, bskyIdentifier: string, twitterId: string, entry: ProcessedTweetEntry): void {
212
212
+
function saveProcessedTweet(
213
213
+
twitterUsername: string,
214
214
+
bskyIdentifier: string,
215
215
+
twitterId: string,
216
216
+
entry: ProcessedTweetEntry,
217
217
+
): void {
184
218
dbService.saveTweet({
185
219
twitter_id: twitterId,
186
220
twitter_username: twitterUsername.toLowerCase(),
···
192
226
bsky_root_cid: entry.root?.cid,
193
227
bsky_tail_uri: entry.tail?.uri,
194
228
bsky_tail_cid: entry.tail?.cid,
195
195
-
status: entry.migrated || (entry.uri && entry.cid) ? 'migrated' : (entry.skipped ? 'skipped' : 'failed')
229
229
+
status: entry.migrated || (entry.uri && entry.cid) ? 'migrated' : entry.skipped ? 'skipped' : 'failed',
196
230
});
197
231
}
198
232
199
199
-
// ============================================================================
233
233
+
// ============================================================================
200
234
// Custom Twitter Client
201
201
-
// ============================================================================
235
235
+
// ============================================================================
202
236
203
237
let scraper: Scraper | null = null;
204
238
let currentTwitterCookies = { authToken: '', ct0: '' };
···
216
250
}
217
251
218
252
if (!authToken || !ct0) return null;
219
219
-
253
253
+
220
254
// Re-initialize if config changed, not yet initialized, or forced reset
221
221
-
if (
222
222
-
!scraper ||
223
223
-
forceReset ||
224
224
-
currentTwitterCookies.authToken !== authToken ||
225
225
-
currentTwitterCookies.ct0 !== ct0
226
226
-
) {
255
255
+
if (!scraper || forceReset || currentTwitterCookies.authToken !== authToken || currentTwitterCookies.ct0 !== ct0) {
227
256
console.log(`🔄 Initializing Twitter scraper with ${useBackupCredentials ? 'BACKUP' : 'PRIMARY'} credentials...`);
228
257
scraper = new Scraper();
229
229
-
await scraper.setCookies([
230
230
-
`auth_token=${authToken}`,
231
231
-
`ct0=${ct0}`
232
232
-
]);
258
258
+
await scraper.setCookies([`auth_token=${authToken}`, `ct0=${ct0}`]);
233
259
234
234
-
currentTwitterCookies = {
235
235
-
authToken: authToken,
236
236
-
ct0: ct0
260
260
+
currentTwitterCookies = {
261
261
+
authToken: authToken,
262
262
+
ct0: ct0,
237
263
};
238
264
}
239
265
return scraper;
···
247
273
await getTwitterScraper(true);
248
274
return true;
249
275
}
250
250
-
console.log("⚠️ No backup credentials available to switch to.");
276
276
+
console.log('⚠️ No backup credentials available to switch to.');
251
277
return false;
252
278
}
253
279
254
280
function mapScraperTweetToLocalTweet(scraperTweet: ScraperTweet): Tweet {
255
255
-
const raw = scraperTweet.__raw_UNSTABLE;
256
256
-
if (!raw) {
257
257
-
// Fallback if raw data is missing (shouldn't happen for timeline tweets usually)
258
258
-
return {
259
259
-
id: scraperTweet.id,
260
260
-
id_str: scraperTweet.id,
261
261
-
text: scraperTweet.text,
262
262
-
full_text: scraperTweet.text,
263
263
-
isRetweet: scraperTweet.isRetweet,
264
264
-
// Construct minimal entities from parsed data
265
265
-
entities: {
266
266
-
urls: scraperTweet.urls.map((url: string) => ({ url, expanded_url: url })),
267
267
-
media: scraperTweet.photos.map((p: any) => ({
268
268
-
url: p.url,
269
269
-
expanded_url: p.url,
270
270
-
media_url_https: p.url,
271
271
-
type: 'photo',
272
272
-
ext_alt_text: p.alt_text,
273
273
-
})),
274
274
-
},
275
275
-
created_at: scraperTweet.timeParsed?.toUTCString()
276
276
-
};
277
277
-
}
278
278
-
281
281
+
const raw = scraperTweet.__raw_UNSTABLE;
282
282
+
if (!raw) {
283
283
+
// Fallback if raw data is missing (shouldn't happen for timeline tweets usually)
279
284
return {
280
280
-
id: raw.id_str,
281
281
-
id_str: raw.id_str,
282
282
-
text: raw.full_text,
283
283
-
full_text: raw.full_text,
284
284
-
created_at: raw.created_at,
285
285
+
id: scraperTweet.id,
286
286
+
id_str: scraperTweet.id,
287
287
+
text: scraperTweet.text,
288
288
+
full_text: scraperTweet.text,
285
289
isRetweet: scraperTweet.isRetweet,
286
286
-
// biome-ignore lint/suspicious/noExplicitAny: raw types match compatible structure
287
287
-
entities: raw.entities as any,
288
288
-
// biome-ignore lint/suspicious/noExplicitAny: raw types match compatible structure
289
289
-
extended_entities: raw.extended_entities as any,
290
290
-
quoted_status_id_str: raw.quoted_status_id_str,
291
291
-
retweeted_status_id_str: raw.retweeted_status_id_str,
292
292
-
is_quote_status: !!raw.quoted_status_id_str,
293
293
-
in_reply_to_status_id_str: raw.in_reply_to_status_id_str,
294
294
-
// biome-ignore lint/suspicious/noExplicitAny: missing in LegacyTweetRaw type
295
295
-
in_reply_to_user_id_str: (raw as any).in_reply_to_user_id_str,
296
296
-
user: {
297
297
-
screen_name: scraperTweet.username,
298
298
-
id_str: scraperTweet.userId,
290
290
+
// Construct minimal entities from parsed data
291
291
+
entities: {
292
292
+
urls: scraperTweet.urls.map((url: string) => ({ url, expanded_url: url })),
293
293
+
media: scraperTweet.photos.map((p: any) => ({
294
294
+
url: p.url,
295
295
+
expanded_url: p.url,
296
296
+
media_url_https: p.url,
297
297
+
type: 'photo',
298
298
+
ext_alt_text: p.alt_text,
299
299
+
})),
299
300
},
301
301
+
created_at: scraperTweet.timeParsed?.toUTCString(),
302
302
+
permanentUrl: scraperTweet.permanentUrl,
300
303
};
304
304
+
}
305
305
+
306
306
+
return {
307
307
+
id: raw.id_str,
308
308
+
id_str: raw.id_str,
309
309
+
text: raw.full_text,
310
310
+
full_text: raw.full_text,
311
311
+
created_at: raw.created_at,
312
312
+
isRetweet: scraperTweet.isRetweet,
313
313
+
// biome-ignore lint/suspicious/noExplicitAny: raw types match compatible structure
314
314
+
entities: raw.entities as any,
315
315
+
// biome-ignore lint/suspicious/noExplicitAny: raw types match compatible structure
316
316
+
extended_entities: raw.extended_entities as any,
317
317
+
quoted_status_id_str: raw.quoted_status_id_str,
318
318
+
retweeted_status_id_str: raw.retweeted_status_id_str,
319
319
+
is_quote_status: !!raw.quoted_status_id_str,
320
320
+
in_reply_to_status_id_str: raw.in_reply_to_status_id_str,
321
321
+
// biome-ignore lint/suspicious/noExplicitAny: missing in LegacyTweetRaw type
322
322
+
in_reply_to_user_id_str: (raw as any).in_reply_to_user_id_str,
323
323
+
// biome-ignore lint/suspicious/noExplicitAny: card comes from raw tweet
324
324
+
card: (raw as any).card,
325
325
+
permanentUrl: scraperTweet.permanentUrl,
326
326
+
user: {
327
327
+
screen_name: scraperTweet.username,
328
328
+
id_str: scraperTweet.userId,
329
329
+
},
330
330
+
};
301
331
}
302
332
303
303
-
// ============================================================================
333
333
+
// ============================================================================
304
334
// Helper Functions
305
305
-
// ============================================================================
335
335
+
// ============================================================================
336
336
+
337
337
+
function normalizeCardBindings(bindingValues?: CardBindingValues): Record<string, CardBindingValue> {
338
338
+
if (!bindingValues) return {};
339
339
+
if (Array.isArray(bindingValues)) {
340
340
+
return bindingValues.reduce(
341
341
+
(acc, entry) => {
342
342
+
if (entry?.key && entry.value) acc[entry.key] = entry.value;
343
343
+
return acc;
344
344
+
},
345
345
+
{} as Record<string, CardBindingValue>,
346
346
+
);
347
347
+
}
348
348
+
return bindingValues as Record<string, CardBindingValue>;
349
349
+
}
350
350
+
351
351
+
function isLikelyUrl(value?: string): value is string {
352
352
+
if (!value) return false;
353
353
+
return /^https?:\/\//i.test(value);
354
354
+
}
355
355
+
356
356
+
function extractCardImageUrl(bindingValues: CardBindingValues, preferredKeys: string[]): string | undefined {
357
357
+
const normalized = normalizeCardBindings(bindingValues);
358
358
+
for (const key of preferredKeys) {
359
359
+
const value = normalized[key];
360
360
+
const imageUrl = value?.image_value?.url;
361
361
+
if (imageUrl) return imageUrl;
362
362
+
}
363
363
+
const fallbackValue = Object.values(normalized).find((value) => value?.image_value?.url);
364
364
+
return fallbackValue?.image_value?.url;
365
365
+
}
366
366
+
367
367
+
function extractCardLink(bindingValues: CardBindingValues, preferredKeys: string[]): string | undefined {
368
368
+
const normalized = normalizeCardBindings(bindingValues);
369
369
+
for (const key of preferredKeys) {
370
370
+
const value = normalized[key];
371
371
+
const link = value?.string_value;
372
372
+
if (isLikelyUrl(link)) return link;
373
373
+
}
374
374
+
const fallbackValue = Object.values(normalized).find((value) => isLikelyUrl(value?.string_value));
375
375
+
return fallbackValue?.string_value;
376
376
+
}
377
377
+
378
378
+
function extractCardTitle(bindingValues: CardBindingValues, preferredKeys: string[]): string | undefined {
379
379
+
const normalized = normalizeCardBindings(bindingValues);
380
380
+
for (const key of preferredKeys) {
381
381
+
const value = normalized[key];
382
382
+
const title = value?.string_value;
383
383
+
if (title && !isLikelyUrl(title)) return title;
384
384
+
}
385
385
+
const fallbackValue = Object.values(normalized).find(
386
386
+
(value) => value?.string_value && !isLikelyUrl(value?.string_value),
387
387
+
);
388
388
+
return fallbackValue?.string_value;
389
389
+
}
390
390
+
391
391
+
function extractCardAlt(bindingValues: CardBindingValues): string | undefined {
392
392
+
const normalized = normalizeCardBindings(bindingValues);
393
393
+
const altValue = Object.values(normalized).find((value) => value?.image_value?.alt);
394
394
+
return altValue?.image_value?.alt;
395
395
+
}
396
396
+
397
397
+
function appendCallToAction(text: string, link?: string, label = 'Sponsored') {
398
398
+
if (!link) return text;
399
399
+
if (text.includes(link)) return text;
400
400
+
return `${text}\n\n${label}: ${link}`.trim();
401
401
+
}
402
402
+
403
403
+
function detectCardMedia(tweet: Tweet): { imageUrls: string[]; link?: string; title?: string; alt?: string } {
404
404
+
if (!tweet.card?.binding_values) return { imageUrls: [] };
405
405
+
const bindings = tweet.card.binding_values;
406
406
+
407
407
+
const imageUrls: string[] = [];
408
408
+
const preferredImageKeys = [
409
409
+
'photo_image_full_size',
410
410
+
'photo_image_full_size_original',
411
411
+
'thumbnail_image',
412
412
+
'image',
413
413
+
'thumbnail',
414
414
+
'summary_photo_image',
415
415
+
'player_image',
416
416
+
];
417
417
+
const preferredLinkKeys = ['site', 'destination', 'landing_url', 'cta_link', 'card_url', 'url'];
418
418
+
const preferredTitleKeys = ['title', 'summary', 'card_title'];
419
419
+
420
420
+
const primaryImage = extractCardImageUrl(bindings, preferredImageKeys);
421
421
+
if (primaryImage) imageUrls.push(primaryImage);
422
422
+
423
423
+
const imageKeys = normalizeCardBindings(bindings);
424
424
+
Object.values(imageKeys).forEach((value) => {
425
425
+
const url = value?.image_value?.url;
426
426
+
if (url && !imageUrls.includes(url)) imageUrls.push(url);
427
427
+
});
428
428
+
429
429
+
const link = extractCardLink(bindings, preferredLinkKeys);
430
430
+
const title = extractCardTitle(bindings, preferredTitleKeys);
431
431
+
const alt = extractCardAlt(bindings);
432
432
+
433
433
+
return { imageUrls, link, title, alt };
434
434
+
}
435
435
+
436
436
+
function buildCardMediaEntities(tweet: Tweet): { media: MediaEntity[]; link?: string } {
437
437
+
const cardData = detectCardMedia(tweet);
438
438
+
if (cardData.imageUrls.length === 0) return { media: [] };
439
439
+
440
440
+
const media = cardData.imageUrls.slice(0, 4).map((url) => ({
441
441
+
media_url_https: url,
442
442
+
type: 'photo' as const,
443
443
+
ext_alt_text: cardData.alt || cardData.title || 'Sponsored image',
444
444
+
source: 'card' as const,
445
445
+
}));
446
446
+
447
447
+
return { media, link: cardData.link };
448
448
+
}
449
449
+
450
450
+
function ensureUrlEntity(entities: TweetEntities | undefined, link?: string) {
451
451
+
if (!link) return;
452
452
+
if (!entities) return;
453
453
+
const urls = entities.urls || [];
454
454
+
if (!urls.some((url) => url.expanded_url === link || url.url === link)) {
455
455
+
urls.push({ url: link, expanded_url: link });
456
456
+
entities.urls = urls;
457
457
+
}
458
458
+
}
459
459
+
460
460
+
function detectSponsoredCard(tweet: Tweet): boolean {
461
461
+
if (!tweet.card?.binding_values) return false;
462
462
+
const cardName = tweet.card.name?.toLowerCase() || '';
463
463
+
const cardMedia = detectCardMedia(tweet);
464
464
+
const hasMultipleImages = cardMedia.imageUrls.length > 1;
465
465
+
const promoKeywords = ['promo', 'unified', 'carousel', 'collection', 'amplify'];
466
466
+
const hasPromoName = promoKeywords.some((keyword) => cardName.includes(keyword));
467
467
+
return hasMultipleImages || hasPromoName;
468
468
+
}
469
469
+
470
470
+
function mergeMediaEntities(primary: MediaEntity[], secondary: MediaEntity[], limit = 4): MediaEntity[] {
471
471
+
const merged: MediaEntity[] = [];
472
472
+
const seen = new Set<string>();
473
473
+
const ordered = [
474
474
+
...primary.filter((media) => media?.source !== 'card'),
475
475
+
...primary.filter((media) => media?.source === 'card'),
476
476
+
...secondary.filter((media) => media?.source !== 'card'),
477
477
+
...secondary.filter((media) => media?.source === 'card'),
478
478
+
];
479
479
+
480
480
+
for (const media of ordered) {
481
481
+
if (!media?.media_url_https) continue;
482
482
+
if (seen.has(media.media_url_https)) continue;
483
483
+
merged.push(media);
484
484
+
seen.add(media.media_url_https);
485
485
+
if (merged.length >= limit) break;
486
486
+
}
487
487
+
488
488
+
return merged;
489
489
+
}
490
490
+
491
491
+
function detectCarouselLinks(tweet: Tweet): string[] {
492
492
+
if (!tweet.card?.binding_values) return [];
493
493
+
const bindings = normalizeCardBindings(tweet.card.binding_values);
494
494
+
const links = Object.values(bindings)
495
495
+
.map((value) => value?.string_value)
496
496
+
.filter((value): value is string => isLikelyUrl(value));
497
497
+
return [...new Set(links)];
498
498
+
}
499
499
+
500
500
+
function mergeUrlEntities(entities: TweetEntities | undefined, links: string[]) {
501
501
+
if (!entities || links.length === 0) return;
502
502
+
const urls = entities.urls || [];
503
503
+
links.forEach((link) => {
504
504
+
if (!urls.some((url) => url.expanded_url === link || url.url === link)) {
505
505
+
urls.push({ url: link, expanded_url: link });
506
506
+
}
507
507
+
});
508
508
+
entities.urls = urls;
509
509
+
}
510
510
+
511
511
+
function injectCardMedia(tweet: Tweet) {
512
512
+
if (!tweet.card?.binding_values) return;
513
513
+
const cardMedia = buildCardMediaEntities(tweet);
514
514
+
if (cardMedia.media.length === 0) return;
515
515
+
516
516
+
const existingMedia = tweet.extended_entities?.media || tweet.entities?.media || [];
517
517
+
const mergedMedia = mergeMediaEntities(existingMedia, cardMedia.media);
518
518
+
519
519
+
if (!tweet.extended_entities) tweet.extended_entities = {};
520
520
+
tweet.extended_entities.media = mergedMedia;
521
521
+
if (!tweet.entities) tweet.entities = {};
522
522
+
if (!tweet.entities.media) tweet.entities.media = mergedMedia;
523
523
+
524
524
+
if (cardMedia.link) {
525
525
+
ensureUrlEntity(tweet.entities, cardMedia.link);
526
526
+
}
527
527
+
528
528
+
const carouselLinks = detectCarouselLinks(tweet);
529
529
+
mergeUrlEntities(tweet.entities, carouselLinks);
530
530
+
}
531
531
+
532
532
+
function ensureSponsoredLinks(text: string, tweet: Tweet): string {
533
533
+
if (!tweet.card?.binding_values) return text;
534
534
+
const carouselLinks = detectCarouselLinks(tweet);
535
535
+
const cardLink = detectCardMedia(tweet).link;
536
536
+
const links = [...new Set([cardLink, ...carouselLinks].filter(Boolean))] as string[];
537
537
+
if (links.length === 0) return text;
538
538
+
539
539
+
const appendedLinks = links.slice(0, 2).map((link) => `Link: ${link}`);
540
540
+
const updatedText = `${text}\n\n${appendedLinks.join('\n')}`.trim();
541
541
+
return updatedText;
542
542
+
}
543
543
+
544
544
+
function addTextFallbacks(text: string): string {
545
545
+
return text.replace(/\s+$/g, '').trim();
546
546
+
}
547
547
+
548
548
+
async function fetchSyndicationMedia(tweetUrl: string): Promise<{ images: string[] }> {
549
549
+
try {
550
550
+
const normalized = tweetUrl.replace('twitter.com', 'x.com');
551
551
+
const res = await axios.get('https://publish.twitter.com/oembed', {
552
552
+
params: { url: normalized },
553
553
+
headers: { 'User-Agent': 'Mozilla/5.0' },
554
554
+
});
555
555
+
const html = res.data?.html as string | undefined;
556
556
+
if (!html) return { images: [] };
557
557
+
558
558
+
const match = html.match(/status\/(\d+)/);
559
559
+
const tweetId = match?.[1];
560
560
+
if (!tweetId) return { images: [] };
561
561
+
562
562
+
const syndicationUrl = `https://cdn.syndication.twimg.com/tweet-result?id=${tweetId}`;
563
563
+
const syndication = await axios.get(syndicationUrl, {
564
564
+
headers: { 'User-Agent': 'Mozilla/5.0', Accept: 'application/json' },
565
565
+
});
566
566
+
const data = syndication.data as Record<string, unknown>;
567
567
+
const images = (data?.photos as { url?: string }[] | undefined)
568
568
+
?.map((photo) => photo.url)
569
569
+
.filter(Boolean) as string[];
570
570
+
return { images: images || [] };
571
571
+
} catch (err) {
572
572
+
return { images: [] };
573
573
+
}
574
574
+
}
575
575
+
576
576
+
function injectSyndicationMedia(tweet: Tweet, syndication: { images: string[] }) {
577
577
+
if (syndication.images.length === 0) return;
578
578
+
const media = syndication.images.slice(0, 4).map((url) => ({
579
579
+
media_url_https: url,
580
580
+
type: 'photo' as const,
581
581
+
ext_alt_text: 'Image from Twitter',
582
582
+
source: 'card' as const,
583
583
+
}));
584
584
+
585
585
+
const existingMedia = tweet.extended_entities?.media || tweet.entities?.media || [];
586
586
+
const mergedMedia = mergeMediaEntities(existingMedia, media);
587
587
+
588
588
+
if (!tweet.extended_entities) tweet.extended_entities = {};
589
589
+
tweet.extended_entities.media = mergedMedia;
590
590
+
if (!tweet.entities) tweet.entities = {};
591
591
+
if (!tweet.entities.media) tweet.entities.media = mergedMedia;
592
592
+
}
306
593
307
594
function detectLanguage(text: string): string[] {
308
595
if (!text || text.trim().length === 0) return ['en'];
···
335
622
return (response.request as any)?.res?.responseUrl || shortUrl;
336
623
} catch (e: any) {
337
624
if (e.code === 'ERR_FR_TOO_MANY_REDIRECTS' || e.response?.status === 403 || e.response?.status === 401) {
338
338
-
// Silent fallback for common expansion issues (redirect loops, login walls)
339
339
-
return shortUrl;
625
625
+
// Silent fallback for common expansion issues (redirect loops, login walls)
626
626
+
return shortUrl;
340
627
}
341
628
return shortUrl;
342
629
}
···
372
659
const isGif = mimeType === 'image/gif';
373
660
const isAnimation = isGif || isWebp;
374
661
375
375
-
if ((buffer.length > MAX_SIZE && (mimeType.startsWith('image/') || mimeType === 'application/octet-stream')) || (isPng && buffer.length > MAX_SIZE)) {
662
662
+
if (
663
663
+
(buffer.length > MAX_SIZE && (mimeType.startsWith('image/') || mimeType === 'application/octet-stream')) ||
664
664
+
(isPng && buffer.length > MAX_SIZE)
665
665
+
) {
376
666
console.log(`[UPLOAD] ⚖️ Image too large (${(buffer.length / 1024).toFixed(2)} KB). Optimizing...`);
377
667
try {
378
668
let image = sharp(buffer);
···
386
676
while (currentBuffer.length > MAX_SIZE && attempts < 5) {
387
677
attempts++;
388
678
console.log(`[UPLOAD] 📉 Compression attempt ${attempts}: Width ${width}, Quality ${quality}...`);
389
389
-
679
679
+
390
680
if (isAnimation) {
391
391
-
// For animations (GIF/WebP), we can only do so much without losing frames
392
392
-
// Try to convert to WebP if it's a GIF, or optimize WebP
393
393
-
image = sharp(buffer, { animated: true });
394
394
-
if (isGif) {
395
395
-
// Convert GIF to WebP for better compression
396
396
-
image = image.webp({ quality: Math.max(quality, 50), effort: 6 });
397
397
-
finalMimeType = 'image/webp';
398
398
-
} else {
399
399
-
image = image.webp({ quality: Math.max(quality, 50), effort: 6 });
400
400
-
}
401
401
-
// Resize if really big
402
402
-
if (metadata.width && metadata.width > 800) {
403
403
-
image = image.resize({ width: 800, withoutEnlargement: true });
404
404
-
}
681
681
+
// For animations (GIF/WebP), we can only do so much without losing frames
682
682
+
// Try to convert to WebP if it's a GIF, or optimize WebP
683
683
+
image = sharp(buffer, { animated: true });
684
684
+
if (isGif) {
685
685
+
// Convert GIF to WebP for better compression
686
686
+
image = image.webp({ quality: Math.max(quality, 50), effort: 6 });
687
687
+
finalMimeType = 'image/webp';
688
688
+
} else {
689
689
+
image = image.webp({ quality: Math.max(quality, 50), effort: 6 });
690
690
+
}
691
691
+
// Resize if really big
692
692
+
if (metadata.width && metadata.width > 800) {
693
693
+
image = image.resize({ width: 800, withoutEnlargement: true });
694
694
+
}
405
695
} else {
406
406
-
// Static images
407
407
-
if (width > 1600) width = 1600;
408
408
-
else if (attempts > 1) width = Math.floor(width * 0.8);
409
409
-
410
410
-
quality = Math.max(50, quality - 10);
411
411
-
412
412
-
image = sharp(buffer)
413
413
-
.resize({ width, withoutEnlargement: true })
414
414
-
.jpeg({ quality, mozjpeg: true });
415
415
-
416
416
-
finalMimeType = 'image/jpeg';
696
696
+
// Static images
697
697
+
if (width > 1600) width = 1600;
698
698
+
else if (attempts > 1) width = Math.floor(width * 0.8);
699
699
+
700
700
+
quality = Math.max(50, quality - 10);
701
701
+
702
702
+
image = sharp(buffer).resize({ width, withoutEnlargement: true }).jpeg({ quality, mozjpeg: true });
703
703
+
704
704
+
finalMimeType = 'image/jpeg';
417
705
}
418
418
-
706
706
+
419
707
currentBuffer = await image.toBuffer();
420
708
if (currentBuffer.length <= MAX_SIZE) {
421
421
-
finalBuffer = currentBuffer;
422
422
-
console.log(`[UPLOAD] ✅ Optimized to ${(finalBuffer.length / 1024).toFixed(2)} KB`);
423
423
-
break;
709
709
+
finalBuffer = currentBuffer;
710
710
+
console.log(`[UPLOAD] ✅ Optimized to ${(finalBuffer.length / 1024).toFixed(2)} KB`);
711
711
+
break;
424
712
}
425
713
}
426
426
-
714
714
+
427
715
if (finalBuffer.length > MAX_SIZE) {
428
428
-
console.warn(`[UPLOAD] ⚠️ Could not compress below limit. Current: ${(finalBuffer.length / 1024).toFixed(2)} KB. Upload might fail.`);
716
716
+
console.warn(
717
717
+
`[UPLOAD] ⚠️ Could not compress below limit. Current: ${(finalBuffer.length / 1024).toFixed(2)} KB. Upload might fail.`,
718
718
+
);
429
719
}
430
430
-
431
720
} catch (err) {
432
721
console.warn(`[UPLOAD] ⚠️ Optimization failed, attempting original upload:`, (err as Error).message);
433
722
finalBuffer = buffer;
···
455
744
'C:\\Program Files (x86)\\Google\\Chrome\\Application\\chrome.exe',
456
745
];
457
746
458
458
-
const executablePath = browserPaths.find(p => fs.existsSync(p));
459
459
-
747
747
+
const executablePath = browserPaths.find((p) => fs.existsSync(p));
748
748
+
460
749
if (!executablePath) {
461
750
console.warn(`[SCREENSHOT] ⏩ Skipping screenshot (no Chrome/Chromium found at common paths).`);
462
751
return null;
···
500
789
`;
501
790
502
791
await page.setContent(html, { waitUntil: 'networkidle0' });
503
503
-
792
792
+
504
793
// Wait for the twitter iframe to load and render
505
794
try {
506
795
await page.waitForSelector('iframe', { timeout: 10000 });
507
796
// Small extra wait for images inside iframe
508
508
-
await new Promise(r => setTimeout(r, 2000));
797
797
+
await new Promise((r) => setTimeout(r, 2000));
509
798
} catch (e) {
510
799
console.warn(`[SCREENSHOT] ⚠️ Timeout waiting for tweet iframe, taking screenshot anyway.`);
511
800
}
···
515
804
const box = await element.boundingBox();
516
805
const buffer = await element.screenshot({ type: 'png', omitBackground: true });
517
806
if (box) {
518
518
-
console.log(`[SCREENSHOT] ✅ Captured successfully (${(buffer.length / 1024).toFixed(2)} KB) - ${Math.round(box.width)}x${Math.round(box.height)}`);
519
519
-
return { buffer: buffer as Buffer, width: Math.round(box.width), height: Math.round(box.height) };
807
807
+
console.log(
808
808
+
`[SCREENSHOT] ✅ Captured successfully (${(buffer.length / 1024).toFixed(2)} KB) - ${Math.round(box.width)}x${Math.round(box.height)}`,
809
809
+
);
810
810
+
return { buffer: buffer as Buffer, width: Math.round(box.width), height: Math.round(box.height) };
520
811
}
521
812
}
522
813
} catch (err) {
···
534
825
535
826
while (!blob) {
536
827
attempts++;
537
537
-
const statusUrl = new URL("https://video.bsky.app/xrpc/app.bsky.video.getJobStatus");
538
538
-
statusUrl.searchParams.append("jobId", jobId);
828
828
+
const statusUrl = new URL('https://video.bsky.app/xrpc/app.bsky.video.getJobStatus');
829
829
+
statusUrl.searchParams.append('jobId', jobId);
539
830
540
831
const statusResponse = await fetch(statusUrl);
541
832
if (!statusResponse.ok) {
···
553
844
if (statusData.jobStatus.blob) {
554
845
blob = statusData.jobStatus.blob;
555
846
console.log(`[VIDEO] 🎉 Video processing complete! Blob ref obtained.`);
556
556
-
} else if (state === "JOB_STATE_FAILED") {
557
557
-
throw new Error(`Video processing failed: ${statusData.jobStatus.error || "Unknown error"}`);
847
847
+
} else if (state === 'JOB_STATE_FAILED') {
848
848
+
throw new Error(`Video processing failed: ${statusData.jobStatus.error || 'Unknown error'}`);
558
849
} else {
559
850
// Wait before next poll
560
851
await new Promise((resolve) => setTimeout(resolve, 5000));
···
562
853
563
854
if (attempts > 60) {
564
855
// ~5 minute timeout
565
565
-
throw new Error("Video processing timed out after 5 minutes.");
856
856
+
throw new Error('Video processing timed out after 5 minutes.');
566
857
}
567
858
}
568
859
return blob!;
···
572
863
try {
573
864
const response = await axios.get(url, {
574
865
headers: {
575
575
-
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
576
576
-
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8',
866
866
+
'User-Agent':
867
867
+
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
868
868
+
Accept: 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8',
577
869
'Accept-Language': 'en-US,en;q=0.9',
578
870
},
579
871
timeout: 10000,
580
872
maxRedirects: 5,
581
873
});
582
582
-
874
874
+
583
875
const $ = cheerio.load(response.data);
584
876
const title = $('meta[property="og:title"]').attr('content') || $('title').text() || '';
585
585
-
const description = $('meta[property="og:description"]').attr('content') || $('meta[name="description"]').attr('content') || '';
877
877
+
const description =
878
878
+
$('meta[property="og:description"]').attr('content') || $('meta[name="description"]').attr('content') || '';
586
879
let thumbBlob: BlobRef | undefined;
587
880
588
881
let imageUrl = $('meta[property="og:image"]').attr('content');
589
882
if (imageUrl) {
590
590
-
if (!imageUrl.startsWith('http')) {
591
591
-
const baseUrl = new URL(url);
592
592
-
imageUrl = new URL(imageUrl, baseUrl.origin).toString();
593
593
-
}
594
594
-
try {
595
595
-
const { buffer, mimeType } = await downloadMedia(imageUrl);
596
596
-
thumbBlob = await uploadToBluesky(agent, buffer, mimeType);
597
597
-
} catch (e) {
598
598
-
// SIlently fail thumbnail upload
599
599
-
}
883
883
+
if (!imageUrl.startsWith('http')) {
884
884
+
const baseUrl = new URL(url);
885
885
+
imageUrl = new URL(imageUrl, baseUrl.origin).toString();
886
886
+
}
887
887
+
try {
888
888
+
const { buffer, mimeType } = await downloadMedia(imageUrl);
889
889
+
thumbBlob = await uploadToBluesky(agent, buffer, mimeType);
890
890
+
} catch (e) {
891
891
+
// SIlently fail thumbnail upload
892
892
+
}
600
893
}
601
894
602
895
if (!title && !description) return null;
603
896
604
897
const external: any = {
605
605
-
uri: url,
606
606
-
title: title || url,
607
607
-
description: description,
898
898
+
uri: url,
899
899
+
title: title || url,
900
900
+
description: description,
608
901
};
609
902
610
903
if (thumbBlob) {
611
611
-
external.thumb = thumbBlob;
904
904
+
external.thumb = thumbBlob;
612
905
}
613
906
614
907
return {
615
615
-
$type: 'app.bsky.embed.external',
616
616
-
external,
908
908
+
$type: 'app.bsky.embed.external',
909
909
+
external,
617
910
};
618
618
-
619
911
} catch (err: any) {
620
912
if (err.code === 'ERR_FR_TOO_MANY_REDIRECTS') {
621
621
-
// Ignore redirect loops
622
622
-
return null;
913
913
+
// Ignore redirect loops
914
914
+
return null;
623
915
}
624
916
console.warn(`Failed to fetch embed card for ${url}:`, err.message || err);
625
917
return null;
···
627
919
}
628
920
629
921
async function uploadVideoToBluesky(agent: BskyAgent, buffer: Buffer, filename: string): Promise<BlobRef> {
630
630
-
const sanitizedFilename = filename.split("?")[0] || "video.mp4";
922
922
+
const sanitizedFilename = filename.split('?')[0] || 'video.mp4';
631
923
console.log(
632
924
`[VIDEO] 🟢 Starting upload process for ${sanitizedFilename} (${(buffer.length / 1024 / 1024).toFixed(2)} MB)`,
633
925
);
···
640
932
641
933
// didDoc might be present in repoDesc
642
934
const pdsService = (repoDesc as any).didDoc?.service?.find(
643
643
-
(s: any) => s.id === "#atproto_pds" || s.type === "AtProtoPds",
935
935
+
(s: any) => s.id === '#atproto_pds' || s.type === 'AtProtoPds',
644
936
);
645
937
const pdsUrl = pdsService?.serviceEndpoint;
646
646
-
const pdsHost = pdsUrl ? new URL(pdsUrl).host : "bsky.social";
938
938
+
const pdsHost = pdsUrl ? new URL(pdsUrl).host : 'bsky.social';
647
939
648
940
console.log(`[VIDEO] 🌐 PDS Host detected: ${pdsHost}`);
649
941
console.log(`[VIDEO] 🔑 Requesting service auth token for audience: did:web:${pdsHost}...`);
650
942
651
943
const { data: serviceAuth } = await agent.com.atproto.server.getServiceAuth({
652
944
aud: `did:web:${pdsHost}`,
653
653
-
lxm: "com.atproto.repo.uploadBlob",
945
945
+
lxm: 'com.atproto.repo.uploadBlob',
654
946
exp: Math.floor(Date.now() / 1000) + 60 * 30,
655
947
});
656
948
console.log(`[VIDEO] ✅ Service auth token obtained.`);
···
658
950
const token = serviceAuth.token;
659
951
660
952
// 2. Upload to Video Service
661
661
-
const uploadUrl = new URL("https://video.bsky.app/xrpc/app.bsky.video.uploadVideo");
662
662
-
uploadUrl.searchParams.append("did", agent.session!.did!);
663
663
-
uploadUrl.searchParams.append("name", sanitizedFilename);
953
953
+
const uploadUrl = new URL('https://video.bsky.app/xrpc/app.bsky.video.uploadVideo');
954
954
+
uploadUrl.searchParams.append('did', agent.session!.did!);
955
955
+
uploadUrl.searchParams.append('name', sanitizedFilename);
664
956
665
957
console.log(`[VIDEO] 📤 Uploading to ${uploadUrl.href}...`);
666
958
const uploadResponse = await fetch(uploadUrl, {
667
667
-
method: "POST",
959
959
+
method: 'POST',
668
960
headers: {
669
961
Authorization: `Bearer ${token}`,
670
670
-
"Content-Type": "video/mp4",
962
962
+
'Content-Type': 'video/mp4',
671
963
},
672
964
body: new Blob([new Uint8Array(buffer)]),
673
965
});
···
678
970
// Handle specific error cases
679
971
try {
680
972
const errorJson = JSON.parse(errorText);
681
681
-
973
973
+
682
974
// Handle server overload gracefully
683
683
-
if (uploadResponse.status === 503 || errorJson.error === "Server does not have enough capacity to handle uploads") {
684
684
-
console.warn(`[VIDEO] ⚠️ Server overloaded (503). Skipping video upload and falling back to link.`);
685
685
-
throw new Error("VIDEO_FALLBACK_503");
975
975
+
if (
976
976
+
uploadResponse.status === 503 ||
977
977
+
errorJson.error === 'Server does not have enough capacity to handle uploads'
978
978
+
) {
979
979
+
console.warn(`[VIDEO] ⚠️ Server overloaded (503). Skipping video upload and falling back to link.`);
980
980
+
throw new Error('VIDEO_FALLBACK_503');
686
981
}
687
982
688
688
-
if (errorJson.error === "already_exists" && errorJson.jobId) {
983
983
+
if (errorJson.error === 'already_exists' && errorJson.jobId) {
689
984
console.log(`[VIDEO] ♻️ Video already exists. Resuming with Job ID: ${errorJson.jobId}`);
690
985
return await pollForVideoProcessing(agent, errorJson.jobId);
691
986
}
692
692
-
if (errorJson.error === "unconfirmed_email" || (errorJson.jobStatus && errorJson.jobStatus.error === "unconfirmed_email")) {
693
693
-
console.error(`[VIDEO] 🛑 BLUESKY ERROR: Your email is unconfirmed. You MUST verify your email on Bluesky to upload videos.`);
694
694
-
throw new Error("Bluesky Email Unconfirmed - Video Upload Rejected");
987
987
+
if (
988
988
+
errorJson.error === 'unconfirmed_email' ||
989
989
+
(errorJson.jobStatus && errorJson.jobStatus.error === 'unconfirmed_email')
990
990
+
) {
991
991
+
console.error(
992
992
+
`[VIDEO] 🛑 BLUESKY ERROR: Your email is unconfirmed. You MUST verify your email on Bluesky to upload videos.`,
993
993
+
);
994
994
+
throw new Error('Bluesky Email Unconfirmed - Video Upload Rejected');
695
995
}
696
996
} catch (e) {
697
697
-
if ((e as Error).message === "VIDEO_FALLBACK_503") throw e;
698
698
-
// Not JSON or missing fields, proceed with throwing original error
997
997
+
if ((e as Error).message === 'VIDEO_FALLBACK_503') throw e;
998
998
+
// Not JSON or missing fields, proceed with throwing original error
699
999
}
700
700
-
1000
1000
+
701
1001
console.error(`[VIDEO] ❌ Server responded with ${uploadResponse.status}: ${errorText}`);
702
1002
throw new Error(`Video upload failed: ${uploadResponse.status} ${errorText}`);
703
1003
}
···
740
1040
// 4. Force split
741
1041
742
1042
let splitIndex = -1;
743
743
-
1043
1043
+
744
1044
// Check paragraphs
745
1045
let checkIndex = remaining.lastIndexOf('\n\n', effectiveLimit);
746
1046
if (checkIndex !== -1) splitIndex = checkIndex;
747
1047
748
1048
// Check sentences
749
1049
if (splitIndex === -1) {
750
750
-
// Look for punctuation followed by space
751
751
-
const sentenceMatches = Array.from(remaining.substring(0, effectiveLimit).matchAll(/[.!?]\s/g));
752
752
-
if (sentenceMatches.length > 0) {
753
753
-
const lastMatch = sentenceMatches[sentenceMatches.length - 1];
754
754
-
if (lastMatch && lastMatch.index !== undefined) {
755
755
-
splitIndex = lastMatch.index + 1; // Include punctuation
756
756
-
}
1050
1050
+
// Look for punctuation followed by space
1051
1051
+
const sentenceMatches = Array.from(remaining.substring(0, effectiveLimit).matchAll(/[.!?]\s/g));
1052
1052
+
if (sentenceMatches.length > 0) {
1053
1053
+
const lastMatch = sentenceMatches[sentenceMatches.length - 1];
1054
1054
+
if (lastMatch && lastMatch.index !== undefined) {
1055
1055
+
splitIndex = lastMatch.index + 1; // Include punctuation
757
1056
}
1057
1057
+
}
758
1058
}
759
1059
760
1060
// Check spaces
761
1061
if (splitIndex === -1) {
762
762
-
checkIndex = remaining.lastIndexOf(' ', effectiveLimit);
763
763
-
if (checkIndex !== -1) splitIndex = checkIndex;
1062
1062
+
checkIndex = remaining.lastIndexOf(' ', effectiveLimit);
1063
1063
+
if (checkIndex !== -1) splitIndex = checkIndex;
764
1064
}
765
1065
766
1066
// Force split if no good break point found
767
1067
if (splitIndex === -1) {
768
768
-
splitIndex = effectiveLimit;
1068
1068
+
splitIndex = effectiveLimit;
769
1069
}
770
1070
771
1071
chunks.push(remaining.substring(0, splitIndex).trim());
···
814
1114
async function fetchUserTweets(username: string, limit: number, processedIds?: Set<string>): Promise<Tweet[]> {
815
1115
const client = await getTwitterScraper();
816
1116
if (!client) return [];
817
817
-
1117
1117
+
818
1118
let retries = 3;
819
1119
while (retries > 0) {
820
1120
try {
821
1121
const tweets: Tweet[] = [];
822
1122
const generator = client.getTweets(username, limit);
823
1123
let consecutiveProcessedCount = 0;
824
824
-
1124
1124
+
825
1125
for await (const t of generator) {
826
1126
const tweet = mapScraperTweetToLocalTweet(t);
827
1127
const tweetId = tweet.id_str || tweet.id;
828
828
-
1128
1128
+
829
1129
// Early stopping logic: if we see 3 consecutive tweets we've already processed, stop.
830
1130
// This assumes timeline order (mostly true).
831
1131
if (processedIds && tweetId && processedIds.has(tweetId)) {
832
832
-
consecutiveProcessedCount++;
833
833
-
if (consecutiveProcessedCount >= 3) {
834
834
-
console.log(`[${username}] 🛑 Found 3 consecutive processed tweets. Stopping fetch early.`);
835
835
-
break;
836
836
-
}
1132
1132
+
consecutiveProcessedCount++;
1133
1133
+
if (consecutiveProcessedCount >= 3) {
1134
1134
+
console.log(`[${username}] 🛑 Found 3 consecutive processed tweets. Stopping fetch early.`);
1135
1135
+
break;
1136
1136
+
}
837
1137
} else {
838
838
-
consecutiveProcessedCount = 0;
1138
1138
+
consecutiveProcessedCount = 0;
839
1139
}
840
1140
841
1141
tweets.push(tweet);
···
844
1144
return tweets;
845
1145
} catch (e: any) {
846
1146
retries--;
847
847
-
const isRetryable = e.message?.includes('ServiceUnavailable') || e.message?.includes('Timeout') || e.message?.includes('429') || e.message?.includes('401');
848
848
-
1147
1147
+
const isRetryable =
1148
1148
+
e.message?.includes('ServiceUnavailable') ||
1149
1149
+
e.message?.includes('Timeout') ||
1150
1150
+
e.message?.includes('429') ||
1151
1151
+
e.message?.includes('401');
1152
1152
+
849
1153
// Check for Twitter Internal Server Error (often returns 400 with specific body)
850
1154
if (e?.response?.status === 400 && JSON.stringify(e?.response?.data || {}).includes('InternalServerError')) {
851
851
-
console.warn(`⚠️ Twitter Internal Server Error (Transient) for ${username}.`);
852
852
-
// Treat as retryable
853
853
-
if (retries > 0) {
854
854
-
await new Promise(r => setTimeout(r, 5000));
855
855
-
continue;
856
856
-
}
1155
1155
+
console.warn(`⚠️ Twitter Internal Server Error (Transient) for ${username}.`);
1156
1156
+
// Treat as retryable
1157
1157
+
if (retries > 0) {
1158
1158
+
await new Promise((r) => setTimeout(r, 5000));
1159
1159
+
continue;
1160
1160
+
}
857
1161
}
858
1162
859
1163
if (isRetryable) {
860
1164
console.warn(`⚠️ Error fetching tweets for ${username} (${e.message}).`);
861
861
-
1165
1165
+
862
1166
// Attempt credential switch if we have backups
863
1167
if (await switchCredentials()) {
864
864
-
console.log(`🔄 Retrying with new credentials...`);
865
865
-
continue; // Retry loop with new credentials
1168
1168
+
console.log(`🔄 Retrying with new credentials...`);
1169
1169
+
continue; // Retry loop with new credentials
866
1170
}
867
1171
868
1172
if (retries > 0) {
869
869
-
console.log(`Waiting 5s before retry...`);
870
870
-
await new Promise(r => setTimeout(r, 5000));
871
871
-
continue;
1173
1173
+
console.log(`Waiting 5s before retry...`);
1174
1174
+
await new Promise((r) => setTimeout(r, 5000));
1175
1175
+
continue;
872
1176
}
873
1177
}
874
874
-
1178
1178
+
875
1179
console.warn(`Error fetching tweets for ${username}:`, e.message || e);
876
1180
return [];
877
1181
}
878
1182
}
879
879
-
1183
1183
+
880
1184
console.log(`[${username}] ⚠️ Scraper returned 0 tweets (or failed silently) after retries.`);
881
1185
return [];
882
1186
}
883
1187
884
884
-
// ============================================================================
1188
1188
+
// ============================================================================
885
1189
// Main Processing Logic
886
886
-
// ============================================================================
1190
1190
+
// ============================================================================
887
1191
888
888
-
// ============================================================================
1192
1192
+
// ============================================================================
889
1193
// Main Processing Logic
890
890
-
// ============================================================================
1194
1194
+
// ============================================================================
891
1195
892
1196
async function processTweets(
893
1197
agent: BskyAgent,
···
909
1213
});
910
1214
911
1215
const processedTweets = loadProcessedTweets(bskyIdentifier);
912
912
-
1216
1216
+
913
1217
// Maintain a local map that updates in real-time for intra-batch replies
914
1218
const localProcessedMap: ProcessedTweetsMap = { ...processedTweets };
915
1219
···
936
1240
if (isRetweet) {
937
1241
console.log(`[${twitterUsername}] ⏩ Skipping retweet ${tweetId}.`);
938
1242
if (!dryRun) {
939
939
-
// Save as skipped so we don't check it again
940
940
-
saveProcessedTweet(twitterUsername, bskyIdentifier, tweetId, { skipped: true, text: tweet.text });
941
941
-
localProcessedMap[tweetId] = { skipped: true, text: tweet.text };
1243
1243
+
// Save as skipped so we don't check it again
1244
1244
+
saveProcessedTweet(twitterUsername, bskyIdentifier, tweetId, { skipped: true, text: tweet.text });
1245
1245
+
localProcessedMap[tweetId] = { skipped: true, text: tweet.text };
942
1246
}
943
1247
continue;
944
1248
}
···
967
1271
// Parent missing from local batch/DB. Attempt to fetch it if it's a self-thread.
968
1272
// We assume it's a self-thread if we don't have it, but we'll verify author after fetch.
969
1273
console.log(`[${twitterUsername}] 🕵️ Parent ${replyStatusId} missing. Checking if backfillable...`);
970
970
-
1274
1274
+
971
1275
let parentBackfilled = false;
972
1276
try {
973
973
-
const scraper = await getTwitterScraper();
974
974
-
if (scraper) {
975
975
-
const parentRaw = await scraper.getTweet(replyStatusId);
976
976
-
if (parentRaw) {
977
977
-
const parentTweet = mapScraperTweetToLocalTweet(parentRaw);
978
978
-
const parentAuthor = parentTweet.user?.screen_name;
979
979
-
980
980
-
if (parentAuthor?.toLowerCase() === twitterUsername.toLowerCase()) {
981
981
-
console.log(`[${twitterUsername}] 🔄 Parent is ours (@${parentAuthor}). Backfilling parent first...`);
982
982
-
// Recursively process the parent
983
983
-
await processTweets(agent, twitterUsername, bskyIdentifier, [parentTweet], dryRun);
984
984
-
985
985
-
// Check if it was saved
986
986
-
const savedParent = dbService.getTweet(replyStatusId, bskyIdentifier);
987
987
-
if (savedParent && savedParent.status === 'migrated') {
988
988
-
// Update local map
989
989
-
localProcessedMap[replyStatusId] = {
990
990
-
uri: savedParent.bsky_uri,
991
991
-
cid: savedParent.bsky_cid,
992
992
-
root: (savedParent.bsky_root_uri && savedParent.bsky_root_cid) ? { uri: savedParent.bsky_root_uri, cid: savedParent.bsky_root_cid } : undefined,
993
993
-
tail: (savedParent.bsky_tail_uri && savedParent.bsky_tail_cid) ? { uri: savedParent.bsky_tail_uri, cid: savedParent.bsky_tail_cid } : undefined,
994
994
-
migrated: true
995
995
-
};
996
996
-
replyParentInfo = localProcessedMap[replyStatusId] ?? null;
997
997
-
parentBackfilled = true;
998
998
-
console.log(`[${twitterUsername}] ✅ Parent backfilled. Resuming thread.`);
999
999
-
}
1000
1000
-
} else {
1001
1001
-
console.log(`[${twitterUsername}] ⏩ Parent is by @${parentAuthor}. Skipping external reply.`);
1002
1002
-
}
1277
1277
+
const scraper = await getTwitterScraper();
1278
1278
+
if (scraper) {
1279
1279
+
const parentRaw = await scraper.getTweet(replyStatusId);
1280
1280
+
if (parentRaw) {
1281
1281
+
const parentTweet = mapScraperTweetToLocalTweet(parentRaw);
1282
1282
+
const parentAuthor = parentTweet.user?.screen_name;
1283
1283
+
1284
1284
+
if (parentAuthor?.toLowerCase() === twitterUsername.toLowerCase()) {
1285
1285
+
console.log(`[${twitterUsername}] 🔄 Parent is ours (@${parentAuthor}). Backfilling parent first...`);
1286
1286
+
// Recursively process the parent
1287
1287
+
await processTweets(agent, twitterUsername, bskyIdentifier, [parentTweet], dryRun);
1288
1288
+
1289
1289
+
// Check if it was saved
1290
1290
+
const savedParent = dbService.getTweet(replyStatusId, bskyIdentifier);
1291
1291
+
if (savedParent && savedParent.status === 'migrated') {
1292
1292
+
// Update local map
1293
1293
+
localProcessedMap[replyStatusId] = {
1294
1294
+
uri: savedParent.bsky_uri,
1295
1295
+
cid: savedParent.bsky_cid,
1296
1296
+
root:
1297
1297
+
savedParent.bsky_root_uri && savedParent.bsky_root_cid
1298
1298
+
? { uri: savedParent.bsky_root_uri, cid: savedParent.bsky_root_cid }
1299
1299
+
: undefined,
1300
1300
+
tail:
1301
1301
+
savedParent.bsky_tail_uri && savedParent.bsky_tail_cid
1302
1302
+
? { uri: savedParent.bsky_tail_uri, cid: savedParent.bsky_tail_cid }
1303
1303
+
: undefined,
1304
1304
+
migrated: true,
1305
1305
+
};
1306
1306
+
replyParentInfo = localProcessedMap[replyStatusId] ?? null;
1307
1307
+
parentBackfilled = true;
1308
1308
+
console.log(`[${twitterUsername}] ✅ Parent backfilled. Resuming thread.`);
1003
1309
}
1310
1310
+
} else {
1311
1311
+
console.log(`[${twitterUsername}] ⏩ Parent is by @${parentAuthor}. Skipping external reply.`);
1312
1312
+
}
1004
1313
}
1314
1314
+
}
1005
1315
} catch (e) {
1006
1006
-
console.warn(`[${twitterUsername}] ⚠️ Failed to fetch/backfill parent ${replyStatusId}:`, e);
1316
1316
+
console.warn(`[${twitterUsername}] ⚠️ Failed to fetch/backfill parent ${replyStatusId}:`, e);
1007
1317
}
1008
1318
1009
1319
if (!parentBackfilled) {
1010
1010
-
console.log(`[${twitterUsername}] ⏩ Skipping external/unknown reply (Parent not found or external).`);
1011
1011
-
if (!dryRun) {
1012
1012
-
saveProcessedTweet(twitterUsername, bskyIdentifier, tweetId, { skipped: true, text: tweetText });
1013
1013
-
localProcessedMap[tweetId] = { skipped: true, text: tweetText };
1014
1014
-
}
1015
1015
-
continue;
1320
1320
+
console.log(`[${twitterUsername}] ⏩ Skipping external/unknown reply (Parent not found or external).`);
1321
1321
+
if (!dryRun) {
1322
1322
+
saveProcessedTweet(twitterUsername, bskyIdentifier, tweetId, { skipped: true, text: tweetText });
1323
1323
+
localProcessedMap[tweetId] = { skipped: true, text: tweetText };
1324
1324
+
}
1325
1325
+
continue;
1016
1326
}
1017
1327
} else {
1018
1328
console.log(`[${twitterUsername}] ⏩ Skipping external/unknown reply.`);
···
1025
1335
}
1026
1336
1027
1337
// Removed early dryRun continue to allow verifying logic
1028
1028
-
1338
1338
+
1029
1339
let text = tweetText
1030
1340
.replace(/&/g, '&')
1031
1341
.replace(/</g, '<')
1032
1342
.replace(/>/g, '>')
1033
1343
.replace(/"/g, '"')
1034
1344
.replace(/'/g, "'");
1035
1035
-
1345
1345
+
1036
1346
// 1. Link Expansion
1037
1347
console.log(`[${twitterUsername}] 🔗 Expanding links...`);
1038
1348
const urls = tweet.entities?.urls || [];
···
1047
1357
const matches = text.match(tcoRegex) || [];
1048
1358
for (const tco of matches) {
1049
1359
// Avoid re-resolving if we already handled it via entities
1050
1050
-
if (urls.some(u => u.url === tco)) continue;
1360
1360
+
if (urls.some((u) => u.url === tco)) continue;
1051
1361
1052
1362
console.log(`[${twitterUsername}] 🔍 Resolving fallback link: ${tco}`);
1053
1363
const resolved = await expandUrl(tco);
1054
1364
if (resolved !== tco) {
1055
1055
-
text = text.replace(tco, resolved);
1056
1056
-
// Add to urls array so it can be used for card embedding later
1057
1057
-
urls.push({ url: tco, expanded_url: resolved });
1365
1365
+
text = text.replace(tco, resolved);
1366
1366
+
// Add to urls array so it can be used for card embedding later
1367
1367
+
urls.push({ url: tco, expanded_url: resolved });
1368
1368
+
}
1369
1369
+
}
1370
1370
+
1371
1371
+
const isSponsoredCard = detectSponsoredCard(tweet);
1372
1372
+
if (isSponsoredCard) {
1373
1373
+
console.log(`[${twitterUsername}] 🧩 Sponsored/card payload detected. Extracting carousel media...`);
1374
1374
+
injectCardMedia(tweet);
1375
1375
+
} else if (tweet.permanentUrl) {
1376
1376
+
const syndication = await fetchSyndicationMedia(tweet.permanentUrl);
1377
1377
+
if (syndication.images.length > 0) {
1378
1378
+
console.log(`[${twitterUsername}] 🧩 Syndication carousel detected. Extracting media...`);
1379
1379
+
injectSyndicationMedia(tweet, syndication);
1058
1380
}
1059
1381
}
1060
1382
···
1072
1394
mediaLinksToRemove.push(media.url);
1073
1395
if (media.expanded_url) mediaLinksToRemove.push(media.expanded_url);
1074
1396
}
1075
1075
-
1397
1397
+
if (media.source === 'card' && media.media_url_https) {
1398
1398
+
mediaLinksToRemove.push(media.media_url_https);
1399
1399
+
}
1400
1400
+
1076
1401
let aspectRatio: AspectRatio | undefined;
1077
1402
if (media.sizes?.large) {
1078
1403
aspectRatio = { width: media.sizes.large.w, height: media.sizes.large.h };
···
1088
1413
console.log(`[${twitterUsername}] 📥 Downloading image (high quality): ${path.basename(highQualityUrl)}`);
1089
1414
updateAppStatus({ message: `Downloading high quality image...` });
1090
1415
const { buffer, mimeType } = await downloadMedia(highQualityUrl);
1091
1091
-
1416
1416
+
1092
1417
let blob: BlobRef;
1093
1418
if (dryRun) {
1094
1094
-
console.log(`[${twitterUsername}] 🧪 [DRY RUN] Would upload image (${(buffer.length/1024).toFixed(2)} KB)`);
1095
1095
-
blob = { ref: { toString: () => 'mock-blob' }, mimeType, size: buffer.length } as any;
1419
1419
+
console.log(
1420
1420
+
`[${twitterUsername}] 🧪 [DRY RUN] Would upload image (${(buffer.length / 1024).toFixed(2)} KB)`,
1421
1421
+
);
1422
1422
+
blob = { ref: { toString: () => 'mock-blob' }, mimeType, size: buffer.length } as any;
1096
1423
} else {
1097
1097
-
console.log(`[${twitterUsername}] 📤 Uploading image to Bluesky...`);
1098
1098
-
updateAppStatus({ message: `Uploading image to Bluesky...` });
1099
1099
-
blob = await uploadToBluesky(agent, buffer, mimeType);
1424
1424
+
console.log(`[${twitterUsername}] 📤 Uploading image to Bluesky...`);
1425
1425
+
updateAppStatus({ message: `Uploading image to Bluesky...` });
1426
1426
+
blob = await uploadToBluesky(agent, buffer, mimeType);
1100
1427
}
1101
1101
-
1428
1428
+
1102
1429
let altText = media.ext_alt_text;
1103
1430
if (!altText) {
1104
1104
-
console.log(`[${twitterUsername}] 🤖 Generating alt text via Gemini...`);
1105
1105
-
// Use original tweet text for context, not the modified/cleaned one
1106
1106
-
altText = await generateAltText(buffer, mimeType, tweetText);
1107
1107
-
if (altText) console.log(`[${twitterUsername}] ✅ Alt text generated: ${altText.substring(0, 50)}...`);
1431
1431
+
console.log(`[${twitterUsername}] 🤖 Generating alt text via Gemini...`);
1432
1432
+
// Use original tweet text for context, not the modified/cleaned one
1433
1433
+
altText = await generateAltText(buffer, mimeType, tweetText);
1434
1434
+
if (altText) console.log(`[${twitterUsername}] ✅ Alt text generated: ${altText.substring(0, 50)}...`);
1108
1435
}
1109
1436
1110
1437
images.push({ alt: altText || 'Image from Twitter', image: blob, aspectRatio });
···
1125
1452
} else if (media.type === 'video' || media.type === 'animated_gif') {
1126
1453
const variants = media.video_info?.variants || [];
1127
1454
const duration = media.video_info?.duration_millis || 0;
1128
1128
-
1129
1129
-
if (duration > 180000) { // 3 minutes
1130
1130
-
console.warn(`[${twitterUsername}] ⚠️ Video too long (${(duration / 1000).toFixed(1)}s). Fallback to link.`);
1131
1131
-
const tweetUrl = `https://twitter.com/${twitterUsername}/status/${tweetId}`;
1132
1132
-
if (!text.includes(tweetUrl)) text += `\n\nVideo: ${tweetUrl}`;
1133
1133
-
continue;
1455
1455
+
1456
1456
+
if (duration > 180000) {
1457
1457
+
// 3 minutes
1458
1458
+
console.warn(`[${twitterUsername}] ⚠️ Video too long (${(duration / 1000).toFixed(1)}s). Fallback to link.`);
1459
1459
+
const tweetUrl = `https://twitter.com/${twitterUsername}/status/${tweetId}`;
1460
1460
+
if (!text.includes(tweetUrl)) text += `\n\nVideo: ${tweetUrl}`;
1461
1461
+
continue;
1134
1462
}
1135
1463
1136
1464
const mp4s = variants
···
1145
1473
console.log(`[${twitterUsername}] 📥 Downloading video: ${videoUrl}`);
1146
1474
updateAppStatus({ message: `Downloading video: ${path.basename(videoUrl)}` });
1147
1475
const { buffer, mimeType } = await downloadMedia(videoUrl);
1148
1148
-
1476
1476
+
1149
1477
if (buffer.length <= 90 * 1024 * 1024) {
1150
1478
const filename = videoUrl.split('/').pop() || 'video.mp4';
1151
1479
if (dryRun) {
1152
1152
-
console.log(`[${twitterUsername}] 🧪 [DRY RUN] Would upload video: ${filename} (${(buffer.length/1024/1024).toFixed(2)} MB)`);
1153
1153
-
videoBlob = { ref: { toString: () => 'mock-video-blob' }, mimeType: 'video/mp4', size: buffer.length } as any;
1480
1480
+
console.log(
1481
1481
+
`[${twitterUsername}] 🧪 [DRY RUN] Would upload video: ${filename} (${(buffer.length / 1024 / 1024).toFixed(2)} MB)`,
1482
1482
+
);
1483
1483
+
videoBlob = {
1484
1484
+
ref: { toString: () => 'mock-video-blob' },
1485
1485
+
mimeType: 'video/mp4',
1486
1486
+
size: buffer.length,
1487
1487
+
} as any;
1154
1488
} else {
1155
1155
-
updateAppStatus({ message: `Uploading video to Bluesky...` });
1156
1156
-
videoBlob = await uploadVideoToBluesky(agent, buffer, filename);
1489
1489
+
updateAppStatus({ message: `Uploading video to Bluesky...` });
1490
1490
+
videoBlob = await uploadVideoToBluesky(agent, buffer, filename);
1157
1491
}
1158
1492
videoAspectRatio = aspectRatio;
1159
1493
console.log(`[${twitterUsername}] ✅ Video upload process complete.`);
1160
1494
break; // Prioritize first video
1161
1495
}
1162
1162
-
1163
1163
-
console.warn(`[${twitterUsername}] ⚠️ Video too large (${(buffer.length / 1024 / 1024).toFixed(2)}MB). Fallback to link.`);
1496
1496
+
1497
1497
+
console.warn(
1498
1498
+
`[${twitterUsername}] ⚠️ Video too large (${(buffer.length / 1024 / 1024).toFixed(2)}MB). Fallback to link.`,
1499
1499
+
);
1164
1500
const tweetUrl = `https://twitter.com/${twitterUsername}/status/${tweetId}`;
1165
1501
if (!text.includes(tweetUrl)) text += `\n\nVideo: ${tweetUrl}`;
1166
1502
} catch (err) {
1167
1503
const errMsg = (err as Error).message;
1168
1168
-
if (errMsg !== "VIDEO_FALLBACK_503") {
1169
1169
-
console.error(`[${twitterUsername}] ❌ Failed video upload flow:`, errMsg);
1504
1504
+
if (errMsg !== 'VIDEO_FALLBACK_503') {
1505
1505
+
console.error(`[${twitterUsername}] ❌ Failed video upload flow:`, errMsg);
1170
1506
}
1171
1507
const tweetUrl = `https://twitter.com/${twitterUsername}/status/${tweetId}`;
1172
1508
if (!text.includes(tweetUrl)) text += `\n\nVideo: ${tweetUrl}`;
···
1178
1514
1179
1515
// Cleanup text
1180
1516
for (const link of mediaLinksToRemove) text = text.split(link).join('').trim();
1517
1517
+
if (isSponsoredCard) {
1518
1518
+
const cardLinks = detectCarouselLinks(tweet);
1519
1519
+
const cardPrimaryLink = detectCardMedia(tweet).link;
1520
1520
+
const requestedLinks = [cardPrimaryLink, ...cardLinks].filter(Boolean) as string[];
1521
1521
+
requestedLinks.forEach((link) => {
1522
1522
+
if (!urls.some((u) => u.expanded_url === link || u.url === link)) {
1523
1523
+
urls.push({ url: link, expanded_url: link });
1524
1524
+
}
1525
1525
+
});
1526
1526
+
}
1181
1527
text = text.replace(/\n\s*\n/g, '\n\n').trim();
1528
1528
+
text = addTextFallbacks(text);
1182
1529
1183
1530
// 3. Quoting Logic
1184
1531
let quoteEmbed: { $type: string; record: { uri: string; cid: string } } | null = null;
···
1194
1541
} else {
1195
1542
const quoteUrlEntity = urls.find((u) => u.expanded_url?.includes(quoteId));
1196
1543
const qUrl = quoteUrlEntity?.expanded_url || `https://twitter.com/i/status/${quoteId}`;
1197
1197
-
1544
1544
+
1198
1545
// Check if it's a self-quote (same user)
1199
1199
-
const isSelfQuote = qUrl.toLowerCase().includes(`twitter.com/${twitterUsername.toLowerCase()}/`) ||
1200
1200
-
qUrl.toLowerCase().includes(`x.com/${twitterUsername.toLowerCase()}/`);
1201
1201
-
1546
1546
+
const isSelfQuote =
1547
1547
+
qUrl.toLowerCase().includes(`twitter.com/${twitterUsername.toLowerCase()}/`) ||
1548
1548
+
qUrl.toLowerCase().includes(`x.com/${twitterUsername.toLowerCase()}/`);
1549
1549
+
1202
1550
if (!isSelfQuote) {
1203
1551
externalQuoteUrl = qUrl;
1204
1552
console.log(`[${twitterUsername}] 🔗 Quoted tweet is external: ${externalQuoteUrl}`);
1205
1205
-
1553
1553
+
1206
1554
// Try to capture screenshot for external QTs if we have space for images
1207
1555
if (images.length < 4 && !videoBlob) {
1208
1556
const ssResult = await captureTweetScreenshot(externalQuoteUrl);
···
1210
1558
try {
1211
1559
let blob: BlobRef;
1212
1560
if (dryRun) {
1213
1213
-
console.log(`[${twitterUsername}] 🧪 [DRY RUN] Would upload screenshot for quote (${(ssResult.buffer.length/1024).toFixed(2)} KB)`);
1214
1214
-
blob = { ref: { toString: () => 'mock-ss-blob' }, mimeType: 'image/png', size: ssResult.buffer.length } as any;
1561
1561
+
console.log(
1562
1562
+
`[${twitterUsername}] 🧪 [DRY RUN] Would upload screenshot for quote (${(ssResult.buffer.length / 1024).toFixed(2)} KB)`,
1563
1563
+
);
1564
1564
+
blob = {
1565
1565
+
ref: { toString: () => 'mock-ss-blob' },
1566
1566
+
mimeType: 'image/png',
1567
1567
+
size: ssResult.buffer.length,
1568
1568
+
} as any;
1215
1569
} else {
1216
1216
-
blob = await uploadToBluesky(agent, ssResult.buffer, 'image/png');
1570
1570
+
blob = await uploadToBluesky(agent, ssResult.buffer, 'image/png');
1217
1571
}
1218
1218
-
images.push({
1219
1219
-
alt: `Quote Tweet: ${externalQuoteUrl}`,
1220
1220
-
image: blob,
1221
1221
-
aspectRatio: { width: ssResult.width, height: ssResult.height }
1572
1572
+
images.push({
1573
1573
+
alt: `Quote Tweet: ${externalQuoteUrl}`,
1574
1574
+
image: blob,
1575
1575
+
aspectRatio: { width: ssResult.width, height: ssResult.height },
1222
1576
});
1223
1577
} catch (e) {
1224
1578
console.warn(`[${twitterUsername}] ⚠️ Failed to upload screenshot blob.`);
···
1229
1583
console.log(`[${twitterUsername}] 🔁 Quoted tweet is a self-quote, skipping link.`);
1230
1584
}
1231
1585
}
1232
1232
-
} else if (images.length === 0 && !videoBlob) {
1233
1233
-
// If no media and no quote, check for external links to embed
1234
1234
-
// We prioritize the LAST link found as it's often the main content
1235
1235
-
const potentialLinks = urls
1236
1236
-
.map(u => u.expanded_url)
1237
1237
-
.filter(u => u && !u.includes('twitter.com') && !u.includes('x.com')) as string[];
1238
1238
-
1239
1239
-
if (potentialLinks.length > 0) {
1240
1240
-
const linkToEmbed = potentialLinks[potentialLinks.length - 1];
1241
1241
-
if (linkToEmbed) {
1242
1242
-
// Optimization: If text is too long, but removing the link makes it fit, do it!
1243
1243
-
// The link will be present in the embed card anyway.
1244
1244
-
if (text.length > 300 && text.includes(linkToEmbed)) {
1245
1245
-
const lengthWithoutLink = text.length - linkToEmbed.length;
1246
1246
-
// Allow some buffer (e.g. whitespace cleanup might save 1-2 chars)
1247
1247
-
if (lengthWithoutLink <= 300) {
1248
1248
-
console.log(`[${twitterUsername}] 📏 Optimizing: Removing link ${linkToEmbed} from text to avoid threading (Card will embed it).`);
1249
1249
-
text = text.replace(linkToEmbed, '').trim();
1250
1250
-
// Clean up potential double punctuation/spaces left behind
1251
1251
-
text = text.replace(/\s\.$/, '.').replace(/\s\s+/g, ' ');
1252
1252
-
}
1253
1253
-
}
1586
1586
+
} else if ((images.length === 0 && !videoBlob) || isSponsoredCard) {
1587
1587
+
// If no media and no quote, check for external links to embed
1588
1588
+
// We prioritize the LAST link found as it's often the main content
1589
1589
+
const potentialLinks = urls
1590
1590
+
.map((u) => u.expanded_url)
1591
1591
+
.filter((u) => u && !u.includes('twitter.com') && !u.includes('x.com')) as string[];
1254
1592
1255
1255
-
console.log(`[${twitterUsername}] 🃏 Fetching link card for: ${linkToEmbed}`);
1256
1256
-
linkCard = await fetchEmbedUrlCard(agent, linkToEmbed);
1593
1593
+
if (potentialLinks.length > 0) {
1594
1594
+
const linkToEmbed = potentialLinks[potentialLinks.length - 1];
1595
1595
+
if (linkToEmbed) {
1596
1596
+
// Optimization: If text is too long, but removing the link makes it fit, do it!
1597
1597
+
// The link will be present in the embed card anyway.
1598
1598
+
if (text.length > 300 && text.includes(linkToEmbed)) {
1599
1599
+
const lengthWithoutLink = text.length - linkToEmbed.length;
1600
1600
+
// Allow some buffer (e.g. whitespace cleanup might save 1-2 chars)
1601
1601
+
if (lengthWithoutLink <= 300) {
1602
1602
+
console.log(
1603
1603
+
`[${twitterUsername}] 📏 Optimizing: Removing link ${linkToEmbed} from text to avoid threading (Card will embed it).`,
1604
1604
+
);
1605
1605
+
text = text.replace(linkToEmbed, '').trim();
1606
1606
+
// Clean up potential double punctuation/spaces left behind
1607
1607
+
text = text.replace(/\s\.$/, '.').replace(/\s\s+/g, ' ');
1257
1608
}
1609
1609
+
}
1610
1610
+
1611
1611
+
console.log(`[${twitterUsername}] 🃏 Fetching link card for: ${linkToEmbed}`);
1612
1612
+
linkCard = await fetchEmbedUrlCard(agent, linkToEmbed);
1258
1613
}
1614
1614
+
}
1259
1615
}
1260
1616
1261
1617
// Only append link for external quotes IF we couldn't natively embed it OR screenshot it
1262
1262
-
const hasScreenshot = images.some(img => img.alt.startsWith('Quote Tweet:'));
1618
1618
+
const hasScreenshot = images.some((img) => img.alt.startsWith('Quote Tweet:'));
1263
1619
if (externalQuoteUrl && !quoteEmbed && !hasScreenshot && !text.includes(externalQuoteUrl)) {
1264
1620
text += `\n\nQT: ${externalQuoteUrl}`;
1265
1621
}
1266
1622
1623
1623
+
if (isSponsoredCard) {
1624
1624
+
const hasCardImages = mediaEntities.some((media) => media.source === 'card');
1625
1625
+
if (hasCardImages) {
1626
1626
+
text = ensureSponsoredLinks(text, tweet);
1627
1627
+
}
1628
1628
+
}
1629
1629
+
1267
1630
// 4. Threading and Posting
1268
1631
const chunks = splitText(text);
1269
1632
console.log(`[${twitterUsername}] 📝 Splitting text into ${chunks.length} chunks.`);
1270
1270
-
1633
1633
+
1271
1634
let lastPostInfo: ProcessedTweetEntry | null = replyParentInfo;
1272
1635
1273
1636
// We will save the first chunk as the "Root" of this tweet, and the last chunk as the "Tail".
···
1276
1639
1277
1640
for (let i = 0; i < chunks.length; i++) {
1278
1641
let chunk = chunks[i] as string;
1279
1279
-
1642
1642
+
1280
1643
// Add (i/n) if split
1281
1644
if (chunks.length > 1) {
1282
1282
-
chunk += ` (${i + 1}/${chunks.length})`;
1645
1645
+
chunk += ` (${i + 1}/${chunks.length})`;
1283
1646
}
1284
1647
1285
1648
console.log(`[${twitterUsername}] 📤 Posting chunk ${i + 1}/${chunks.length}...`);
1286
1649
updateAppStatus({ message: `Posting chunk ${i + 1}/${chunks.length}...` });
1287
1287
-
1650
1650
+
1288
1651
const rt = new RichText({ text: chunk });
1289
1652
await rt.detectFacets(agent);
1290
1653
const detectedLangs = detectLanguage(chunk);
···
1331
1694
let rootRef: { uri: string; cid: string } | null = null;
1332
1695
1333
1696
if (lastPostInfo?.uri && lastPostInfo?.cid) {
1334
1334
-
// If this is the start of a new tweet (i=0), check if parent has a tail
1335
1335
-
if (i === 0 && lastPostInfo.tail) {
1336
1336
-
parentRef = lastPostInfo.tail;
1337
1337
-
} else {
1338
1338
-
// Otherwise (intra-tweet or parent has no tail), use the main uri/cid (which is the previous post/chunk)
1339
1339
-
parentRef = { uri: lastPostInfo.uri, cid: lastPostInfo.cid };
1340
1340
-
}
1341
1341
-
1342
1342
-
rootRef = lastPostInfo.root || { uri: lastPostInfo.uri, cid: lastPostInfo.cid };
1697
1697
+
// If this is the start of a new tweet (i=0), check if parent has a tail
1698
1698
+
if (i === 0 && lastPostInfo.tail) {
1699
1699
+
parentRef = lastPostInfo.tail;
1700
1700
+
} else {
1701
1701
+
// Otherwise (intra-tweet or parent has no tail), use the main uri/cid (which is the previous post/chunk)
1702
1702
+
parentRef = { uri: lastPostInfo.uri, cid: lastPostInfo.cid };
1703
1703
+
}
1704
1704
+
1705
1705
+
rootRef = lastPostInfo.root || { uri: lastPostInfo.uri, cid: lastPostInfo.cid };
1343
1706
}
1344
1707
1345
1708
if (parentRef && rootRef) {
···
1353
1716
// Retry logic for network/socket errors
1354
1717
let response: any;
1355
1718
let retries = 3;
1356
1356
-
1719
1719
+
1357
1720
if (dryRun) {
1358
1358
-
console.log(`[${twitterUsername}] 🧪 [DRY RUN] Would post chunk ${i + 1}/${chunks.length}`);
1359
1359
-
if (postRecord.embed) console.log(` - With embed: ${postRecord.embed.$type}`);
1360
1360
-
if (postRecord.reply) console.log(` - As reply to: ${postRecord.reply.parent.uri}`);
1361
1361
-
response = { uri: 'at://did:plc:mock/app.bsky.feed.post/mock', cid: 'mock-cid' };
1721
1721
+
console.log(`[${twitterUsername}] 🧪 [DRY RUN] Would post chunk ${i + 1}/${chunks.length}`);
1722
1722
+
if (postRecord.embed) console.log(` - With embed: ${postRecord.embed.$type}`);
1723
1723
+
if (postRecord.reply) console.log(` - As reply to: ${postRecord.reply.parent.uri}`);
1724
1724
+
response = { uri: 'at://did:plc:mock/app.bsky.feed.post/mock', cid: 'mock-cid' };
1362
1725
} else {
1363
1363
-
while (retries > 0) {
1364
1364
-
try {
1365
1365
-
response = await agent.post(postRecord);
1366
1366
-
break;
1367
1367
-
} catch (err: any) {
1368
1368
-
retries--;
1369
1369
-
if (retries === 0) throw err;
1370
1370
-
console.warn(`[${twitterUsername}] ⚠️ Post failed (Socket/Network), retrying in 5s... (${retries} retries left)`);
1371
1371
-
await new Promise(r => setTimeout(r, 5000));
1372
1372
-
}
1726
1726
+
while (retries > 0) {
1727
1727
+
try {
1728
1728
+
response = await agent.post(postRecord);
1729
1729
+
break;
1730
1730
+
} catch (err: any) {
1731
1731
+
retries--;
1732
1732
+
if (retries === 0) throw err;
1733
1733
+
console.warn(
1734
1734
+
`[${twitterUsername}] ⚠️ Post failed (Socket/Network), retrying in 5s... (${retries} retries left)`,
1735
1735
+
);
1736
1736
+
await new Promise((r) => setTimeout(r, 5000));
1373
1737
}
1738
1738
+
}
1374
1739
}
1375
1375
-
1740
1740
+
1376
1741
const currentPostInfo = {
1377
1377
-
uri: response.uri,
1378
1378
-
cid: response.cid,
1379
1379
-
root: postRecord.reply ? postRecord.reply.root : { uri: response.uri, cid: response.cid },
1380
1380
-
// Text is just the current chunk text
1381
1381
-
text: chunk
1742
1742
+
uri: response.uri,
1743
1743
+
cid: response.cid,
1744
1744
+
root: postRecord.reply ? postRecord.reply.root : { uri: response.uri, cid: response.cid },
1745
1745
+
// Text is just the current chunk text
1746
1746
+
text: chunk,
1382
1747
};
1383
1383
-
1748
1748
+
1384
1749
if (i === 0) firstChunkInfo = currentPostInfo;
1385
1750
lastChunkInfo = currentPostInfo;
1386
1751
lastPostInfo = currentPostInfo; // Update for next iteration
1387
1752
1388
1753
console.log(`[${twitterUsername}] ✅ Chunk ${i + 1} posted successfully.`);
1389
1389
-
1754
1754
+
1390
1755
if (chunks.length > 1) {
1391
1756
await new Promise((r) => setTimeout(r, 3000));
1392
1757
}
···
1395
1760
break;
1396
1761
}
1397
1762
}
1398
1398
-
1763
1763
+
1399
1764
// Save to DB and Map
1400
1765
if (firstChunkInfo && lastChunkInfo) {
1401
1401
-
const entry: ProcessedTweetEntry = {
1402
1402
-
uri: firstChunkInfo.uri,
1403
1403
-
cid: firstChunkInfo.cid,
1404
1404
-
root: firstChunkInfo.root,
1405
1405
-
tail: { uri: lastChunkInfo.uri, cid: lastChunkInfo.cid }, // Save tail!
1406
1406
-
text: tweetText
1407
1407
-
};
1408
1408
-
1409
1409
-
if (!dryRun) {
1410
1410
-
saveProcessedTweet(twitterUsername, bskyIdentifier, tweetId, entry);
1411
1411
-
localProcessedMap[tweetId] = entry; // Update local map for subsequent replies in this batch
1412
1412
-
}
1766
1766
+
const entry: ProcessedTweetEntry = {
1767
1767
+
uri: firstChunkInfo.uri,
1768
1768
+
cid: firstChunkInfo.cid,
1769
1769
+
root: firstChunkInfo.root,
1770
1770
+
tail: { uri: lastChunkInfo.uri, cid: lastChunkInfo.cid }, // Save tail!
1771
1771
+
text: tweetText,
1772
1772
+
};
1773
1773
+
1774
1774
+
if (!dryRun) {
1775
1775
+
saveProcessedTweet(twitterUsername, bskyIdentifier, tweetId, entry);
1776
1776
+
localProcessedMap[tweetId] = entry; // Update local map for subsequent replies in this batch
1777
1777
+
}
1413
1778
}
1414
1414
-
1779
1779
+
1415
1780
// Add a random delay between 5s and 15s to be more human-like
1416
1781
const wait = Math.floor(Math.random() * 10000) + 5000;
1417
1782
console.log(`[${twitterUsername}] 😴 Pacing: Waiting ${wait / 1000}s before next tweet.`);
···
1431
1796
requestId?: string,
1432
1797
): Promise<void> {
1433
1798
const config = getConfig();
1434
1434
-
const mapping = config.mappings.find((m) => m.twitterUsernames.map(u => u.toLowerCase()).includes(twitterUsername.toLowerCase()));
1799
1799
+
const mapping = config.mappings.find((m) =>
1800
1800
+
m.twitterUsernames.map((u) => u.toLowerCase()).includes(twitterUsername.toLowerCase()),
1801
1801
+
);
1435
1802
if (!mapping) {
1436
1803
console.error(`No mapping found for twitter username: ${twitterUsername}`);
1437
1804
return;
···
1439
1806
1440
1807
let agent = await getAgent(mapping);
1441
1808
if (!agent) {
1442
1442
-
if (dryRun) {
1443
1443
-
console.log("⚠️ Could not login to Bluesky, but proceeding with MOCK AGENT for Dry Run.");
1444
1444
-
// biome-ignore lint/suspicious/noExplicitAny: mock agent
1445
1445
-
agent = {
1446
1446
-
post: async (record: any) => ({ uri: 'at://did:plc:mock/app.bsky.feed.post/mock', cid: 'mock-cid' }),
1447
1447
-
uploadBlob: async (data: any) => ({ data: { blob: { ref: { toString: () => 'mock-blob' } } } }),
1448
1448
-
// Add other necessary methods if they are called outside of the already mocked dryRun blocks
1449
1449
-
// But since we mocked the calls inside processTweets for dryRun, we just need the object to exist.
1450
1450
-
session: { did: 'did:plc:mock' },
1451
1451
-
com: { atproto: { repo: { describeRepo: async () => ({ data: {} }) } } }
1452
1452
-
} as any;
1453
1453
-
} else {
1454
1454
-
return;
1455
1455
-
}
1809
1809
+
if (dryRun) {
1810
1810
+
console.log('⚠️ Could not login to Bluesky, but proceeding with MOCK AGENT for Dry Run.');
1811
1811
+
// biome-ignore lint/suspicious/noExplicitAny: mock agent
1812
1812
+
agent = {
1813
1813
+
post: async (record: any) => ({ uri: 'at://did:plc:mock/app.bsky.feed.post/mock', cid: 'mock-cid' }),
1814
1814
+
uploadBlob: async (data: any) => ({ data: { blob: { ref: { toString: () => 'mock-blob' } } } }),
1815
1815
+
// Add other necessary methods if they are called outside of the already mocked dryRun blocks
1816
1816
+
// But since we mocked the calls inside processTweets for dryRun, we just need the object to exist.
1817
1817
+
session: { did: 'did:plc:mock' },
1818
1818
+
com: { atproto: { repo: { describeRepo: async () => ({ data: {} }) } } },
1819
1819
+
} as any;
1820
1820
+
} else {
1821
1821
+
return;
1822
1822
+
}
1456
1823
}
1457
1824
1458
1825
console.log(`Starting full history import for ${twitterUsername} -> ${mapping.bskyIdentifier}...`);
···
1463
1830
1464
1831
console.log(`Fetching tweets for ${twitterUsername}...`);
1465
1832
updateAppStatus({ message: `Fetching tweets...` });
1466
1466
-
1833
1833
+
1467
1834
const client = await getTwitterScraper();
1468
1835
if (client) {
1469
1469
-
try {
1470
1470
-
// Use getTweets which reliably fetches user timeline
1471
1471
-
// limit defaults to 15 in function signature, but for history import we might want more.
1472
1472
-
// However, the generator will fetch as much as we ask.
1473
1473
-
const fetchLimit = limit || 100;
1474
1474
-
const generator = client.getTweets(twitterUsername, fetchLimit);
1475
1475
-
1476
1476
-
for await (const scraperTweet of generator) {
1477
1477
-
if (!ignoreCancellation) {
1478
1478
-
const stillPending = getPendingBackfills().some(b => b.id === mapping.id && (!requestId || b.requestId === requestId));
1479
1479
-
if (!stillPending) {
1480
1480
-
console.log(`[${twitterUsername}] 🛑 Backfill cancelled.`);
1481
1481
-
break;
1482
1482
-
}
1836
1836
+
try {
1837
1837
+
// Use getTweets which reliably fetches user timeline
1838
1838
+
// limit defaults to 15 in function signature, but for history import we might want more.
1839
1839
+
// However, the generator will fetch as much as we ask.
1840
1840
+
const fetchLimit = limit || 100;
1841
1841
+
const generator = client.getTweets(twitterUsername, fetchLimit);
1483
1842
1484
1484
-
}
1485
1485
-
1486
1486
-
const t = mapScraperTweetToLocalTweet(scraperTweet);
1487
1487
-
const tid = t.id_str || t.id;
1488
1488
-
if (!tid) continue;
1489
1489
-
1490
1490
-
if (!processedTweets[tid] && !seenIds.has(tid)) {
1491
1491
-
allFoundTweets.push(t);
1492
1492
-
seenIds.add(tid);
1493
1493
-
}
1494
1494
-
1495
1495
-
if (allFoundTweets.length >= fetchLimit) break;
1843
1843
+
for await (const scraperTweet of generator) {
1844
1844
+
if (!ignoreCancellation) {
1845
1845
+
const stillPending = getPendingBackfills().some(
1846
1846
+
(b) => b.id === mapping.id && (!requestId || b.requestId === requestId),
1847
1847
+
);
1848
1848
+
if (!stillPending) {
1849
1849
+
console.log(`[${twitterUsername}] 🛑 Backfill cancelled.`);
1850
1850
+
break;
1496
1851
}
1497
1497
-
} catch(e) {
1498
1498
-
console.warn("Error during history fetch:", e);
1852
1852
+
}
1853
1853
+
1854
1854
+
const t = mapScraperTweetToLocalTweet(scraperTweet);
1855
1855
+
const tid = t.id_str || t.id;
1856
1856
+
if (!tid) continue;
1857
1857
+
1858
1858
+
if (!processedTweets[tid] && !seenIds.has(tid)) {
1859
1859
+
allFoundTweets.push(t);
1860
1860
+
seenIds.add(tid);
1861
1861
+
}
1862
1862
+
1863
1863
+
if (allFoundTweets.length >= fetchLimit) break;
1499
1864
}
1865
1865
+
} catch (e) {
1866
1866
+
console.warn('Error during history fetch:', e);
1867
1867
+
}
1500
1868
}
1501
1869
1502
1870
console.log(`Fetch complete. Found ${allFoundTweets.length} new tweets to import.`);
···
1510
1878
const activeTasks = new Map<string, Promise<void>>();
1511
1879
1512
1880
async function runAccountTask(mapping: AccountMapping, backfillRequest?: PendingBackfill, dryRun = false) {
1513
1513
-
if (activeTasks.has(mapping.id)) return; // Already running
1881
1881
+
if (activeTasks.has(mapping.id)) return; // Already running
1514
1882
1515
1515
-
const task = (async () => {
1516
1516
-
try {
1517
1517
-
const agent = await getAgent(mapping);
1518
1518
-
if (!agent) return;
1883
1883
+
const task = (async () => {
1884
1884
+
try {
1885
1885
+
const agent = await getAgent(mapping);
1886
1886
+
if (!agent) return;
1519
1887
1520
1520
-
const backfillReq = backfillRequest ?? getPendingBackfills().find(b => b.id === mapping.id);
1521
1521
-
1522
1522
-
if (backfillReq) {
1523
1523
-
const limit = backfillReq.limit || 15;
1524
1524
-
console.log(`[${mapping.bskyIdentifier}] Running backfill for ${mapping.twitterUsernames.length} accounts (limit ${limit})...`);
1525
1525
-
updateAppStatus({
1526
1526
-
state: 'backfilling',
1527
1527
-
currentAccount: mapping.twitterUsernames[0],
1528
1528
-
message: `Starting backfill (limit ${limit})...`,
1529
1529
-
backfillMappingId: mapping.id,
1530
1530
-
backfillRequestId: backfillReq.requestId,
1531
1531
-
});
1532
1532
-
1533
1533
-
for (const twitterUsername of mapping.twitterUsernames) {
1534
1534
-
const stillPending = getPendingBackfills().some(
1535
1535
-
(b) => b.id === mapping.id && b.requestId === backfillReq.requestId,
1536
1536
-
);
1537
1537
-
if (!stillPending) {
1538
1538
-
console.log(`[${mapping.bskyIdentifier}] 🛑 Backfill request replaced; stopping.`);
1539
1539
-
break;
1540
1540
-
}
1888
1888
+
const backfillReq = backfillRequest ?? getPendingBackfills().find((b) => b.id === mapping.id);
1889
1889
+
1890
1890
+
if (backfillReq) {
1891
1891
+
const limit = backfillReq.limit || 15;
1892
1892
+
console.log(
1893
1893
+
`[${mapping.bskyIdentifier}] Running backfill for ${mapping.twitterUsernames.length} accounts (limit ${limit})...`,
1894
1894
+
);
1895
1895
+
updateAppStatus({
1896
1896
+
state: 'backfilling',
1897
1897
+
currentAccount: mapping.twitterUsernames[0],
1898
1898
+
message: `Starting backfill (limit ${limit})...`,
1899
1899
+
backfillMappingId: mapping.id,
1900
1900
+
backfillRequestId: backfillReq.requestId,
1901
1901
+
});
1902
1902
+
1903
1903
+
for (const twitterUsername of mapping.twitterUsernames) {
1904
1904
+
const stillPending = getPendingBackfills().some(
1905
1905
+
(b) => b.id === mapping.id && b.requestId === backfillReq.requestId,
1906
1906
+
);
1907
1907
+
if (!stillPending) {
1908
1908
+
console.log(`[${mapping.bskyIdentifier}] 🛑 Backfill request replaced; stopping.`);
1909
1909
+
break;
1910
1910
+
}
1911
1911
+
1912
1912
+
try {
1913
1913
+
updateAppStatus({
1914
1914
+
state: 'backfilling',
1915
1915
+
currentAccount: twitterUsername,
1916
1916
+
message: `Starting backfill (limit ${limit})...`,
1917
1917
+
backfillMappingId: mapping.id,
1918
1918
+
backfillRequestId: backfillReq.requestId,
1919
1919
+
});
1920
1920
+
await importHistory(twitterUsername, mapping.bskyIdentifier, limit, dryRun, false, backfillReq.requestId);
1921
1921
+
} catch (err) {
1922
1922
+
console.error(`❌ Error backfilling ${twitterUsername}:`, err);
1923
1923
+
}
1924
1924
+
}
1925
1925
+
clearBackfill(mapping.id, backfillReq.requestId);
1926
1926
+
updateAppStatus({
1927
1927
+
state: 'idle',
1928
1928
+
message: `Backfill complete for ${mapping.bskyIdentifier}`,
1929
1929
+
backfillMappingId: undefined,
1930
1930
+
backfillRequestId: undefined,
1931
1931
+
});
1932
1932
+
console.log(`[${mapping.bskyIdentifier}] Backfill complete.`);
1933
1933
+
} else {
1934
1934
+
updateAppStatus({ backfillMappingId: undefined, backfillRequestId: undefined });
1541
1935
1542
1542
-
try {
1543
1543
-
updateAppStatus({
1544
1544
-
state: 'backfilling',
1545
1545
-
currentAccount: twitterUsername,
1546
1546
-
message: `Starting backfill (limit ${limit})...`,
1547
1547
-
backfillMappingId: mapping.id,
1548
1548
-
backfillRequestId: backfillReq.requestId,
1549
1549
-
});
1550
1550
-
await importHistory(twitterUsername, mapping.bskyIdentifier, limit, dryRun, false, backfillReq.requestId);
1551
1551
-
} catch (err) {
1552
1552
-
console.error(`❌ Error backfilling ${twitterUsername}:`, err);
1553
1553
-
}
1554
1554
-
}
1555
1555
-
clearBackfill(mapping.id, backfillReq.requestId);
1556
1556
-
updateAppStatus({
1557
1557
-
state: 'idle',
1558
1558
-
message: `Backfill complete for ${mapping.bskyIdentifier}`,
1559
1559
-
backfillMappingId: undefined,
1560
1560
-
backfillRequestId: undefined,
1561
1561
-
});
1562
1562
-
console.log(`[${mapping.bskyIdentifier}] Backfill complete.`);
1563
1563
-
} else {
1564
1564
-
updateAppStatus({ backfillMappingId: undefined, backfillRequestId: undefined });
1936
1936
+
// Pre-load processed IDs for optimization
1937
1937
+
const processedMap = loadProcessedTweets(mapping.bskyIdentifier);
1938
1938
+
const processedIds = new Set(Object.keys(processedMap));
1939
1939
+
1940
1940
+
for (const twitterUsername of mapping.twitterUsernames) {
1941
1941
+
try {
1942
1942
+
console.log(`[${twitterUsername}] 🏁 Starting check for new tweets...`);
1943
1943
+
updateAppStatus({
1944
1944
+
state: 'checking',
1945
1945
+
currentAccount: twitterUsername,
1946
1946
+
message: 'Fetching latest tweets...',
1947
1947
+
backfillMappingId: undefined,
1948
1948
+
backfillRequestId: undefined,
1949
1949
+
});
1565
1950
1566
1566
-
// Pre-load processed IDs for optimization
1567
1567
-
const processedMap = loadProcessedTweets(mapping.bskyIdentifier);
1568
1568
-
const processedIds = new Set(Object.keys(processedMap));
1951
1951
+
// Use fetchUserTweets with early stopping optimization
1952
1952
+
// Increase limit slightly since we have early stopping now
1953
1953
+
const tweets = await fetchUserTweets(twitterUsername, 50, processedIds);
1569
1954
1570
1570
-
for (const twitterUsername of mapping.twitterUsernames) {
1571
1571
-
try {
1572
1572
-
console.log(`[${twitterUsername}] 🏁 Starting check for new tweets...`);
1573
1573
-
updateAppStatus({
1574
1574
-
state: 'checking',
1575
1575
-
currentAccount: twitterUsername,
1576
1576
-
message: 'Fetching latest tweets...',
1577
1577
-
backfillMappingId: undefined,
1578
1578
-
backfillRequestId: undefined,
1579
1579
-
});
1580
1580
-
1581
1581
-
// Use fetchUserTweets with early stopping optimization
1582
1582
-
// Increase limit slightly since we have early stopping now
1583
1583
-
const tweets = await fetchUserTweets(twitterUsername, 50, processedIds);
1584
1584
-
1585
1585
-
if (!tweets || tweets.length === 0) {
1586
1586
-
console.log(`[${twitterUsername}] ℹ️ No tweets found (or fetch failed).`);
1587
1587
-
continue;
1588
1588
-
}
1589
1589
-
1590
1590
-
console.log(`[${twitterUsername}] 📥 Fetched ${tweets.length} tweets.`);
1591
1591
-
await processTweets(agent, twitterUsername, mapping.bskyIdentifier, tweets, dryRun);
1592
1592
-
} catch (err) {
1593
1593
-
console.error(`❌ Error checking ${twitterUsername}:`, err);
1594
1594
-
}
1595
1595
-
}
1955
1955
+
if (!tweets || tweets.length === 0) {
1956
1956
+
console.log(`[${twitterUsername}] ℹ️ No tweets found (or fetch failed).`);
1957
1957
+
continue;
1596
1958
}
1597
1597
-
} catch (err) {
1598
1598
-
console.error(`Error processing mapping ${mapping.bskyIdentifier}:`, err);
1599
1599
-
} finally {
1600
1600
-
activeTasks.delete(mapping.id);
1959
1959
+
1960
1960
+
console.log(`[${twitterUsername}] 📥 Fetched ${tweets.length} tweets.`);
1961
1961
+
await processTweets(agent, twitterUsername, mapping.bskyIdentifier, tweets, dryRun);
1962
1962
+
} catch (err) {
1963
1963
+
console.error(`❌ Error checking ${twitterUsername}:`, err);
1964
1964
+
}
1601
1965
}
1602
1602
-
})();
1966
1966
+
}
1967
1967
+
} catch (err) {
1968
1968
+
console.error(`Error processing mapping ${mapping.bskyIdentifier}:`, err);
1969
1969
+
} finally {
1970
1970
+
activeTasks.delete(mapping.id);
1971
1971
+
}
1972
1972
+
})();
1603
1973
1604
1604
-
activeTasks.set(mapping.id, task);
1605
1605
-
return task; // Return task promise for await in main loop
1974
1974
+
activeTasks.set(mapping.id, task);
1975
1975
+
return task; // Return task promise for await in main loop
1606
1976
}
1607
1977
1978
1978
+
import type { AccountMapping } from './config-manager.js';
1608
1979
import {
1609
1609
-
startServer,
1610
1610
-
updateLastCheckTime,
1611
1611
-
getPendingBackfills,
1612
1980
clearBackfill,
1613
1981
getNextCheckTime,
1982
1982
+
getPendingBackfills,
1983
1983
+
startServer,
1614
1984
updateAppStatus,
1985
1985
+
updateLastCheckTime,
1615
1986
} from './server.js';
1616
1987
import type { PendingBackfill } from './server.js';
1617
1617
-
import { AccountMapping } from './config-manager.js';
1618
1988
1619
1989
async function main(): Promise<void> {
1620
1990
const program = new Command();
···
1655
2025
console.error('Twitter credentials not set. Cannot import history.');
1656
2026
process.exit(1);
1657
2027
}
1658
1658
-
const mapping = config.mappings.find(m => m.twitterUsernames.map(u => u.toLowerCase()).includes(options.username.toLowerCase()));
2028
2028
+
const mapping = config.mappings.find((m) =>
2029
2029
+
m.twitterUsernames.map((u) => u.toLowerCase()).includes(options.username.toLowerCase()),
2030
2030
+
);
1659
2031
if (!mapping) {
1660
2032
console.error(`No mapping found for ${options.username}`);
1661
2033
process.exit(1);
···
1675
2047
// Concurrency limit for processing accounts
1676
2048
const runLimit = pLimit(3);
1677
2049
1678
1678
-
const findMappingById = (mappings: AccountMapping[], id: string) =>
1679
1679
-
mappings.find((mapping) => mapping.id === id);
2050
2050
+
const findMappingById = (mappings: AccountMapping[], id: string) => mappings.find((mapping) => mapping.id === id);
1680
2051
1681
2052
// Main loop
1682
2053
while (true) {
···
1719
2090
for (const mapping of config.mappings) {
1720
2091
if (!mapping.enabled) continue;
1721
2092
1722
1722
-
tasks.push(runLimit(async () => {
1723
1723
-
await runAccountTask(mapping, undefined, options.dryRun);
1724
1724
-
}));
2093
2093
+
tasks.push(
2094
2094
+
runLimit(async () => {
2095
2095
+
await runAccountTask(mapping, undefined, options.dryRun);
2096
2096
+
}),
2097
2097
+
);
1725
2098
}
1726
2099
1727
2100
if (tasks.length > 0) {