scraping atproto for the most followed accounts

feat: batch fetches

+28 -30
+28 -30
src/index.js
··· 25 25 26 26 const data = await response.json(); 27 27 28 - accounts.push(...data.repos); 28 + // only get at did's from the accounts, and propigate the pds 29 + accounts.push(...data.repos.map(acc => ({ did: acc.did, pds }))); 29 30 30 31 if (data.cursor) { 31 32 return await getAccountsOnPds(pds, data.cursor, accounts); ··· 34 35 return accounts; 35 36 } 36 37 37 - async function getProfile(actor) { 38 - const response = await client.get('app.bsky.actor.getProfile', { 39 - params: { actor }, 38 + async function getProfiles(actorsWithPds) { 39 + const dids = actorsWithPds.map(acc => acc.did); 40 + const didToPds = new Map(actorsWithPds.map(acc => [acc.did, acc.pds])); 41 + 42 + const response = await client.get('app.bsky.actor.getProfiles', { 43 + params: { actors: dids }, 40 44 }); 41 45 42 - if (!response.ok) return; 46 + if (!response.ok) return []; 43 47 44 - return response.data; 48 + return response.data.profiles.map(profile => ({ 49 + ...profile, 50 + pds: didToPds.get(profile.did), 51 + })); 45 52 } 46 53 47 54 // finally do the thing ··· 64 71 pdses.push(host); 65 72 } 66 73 67 - const accountsToWrite = []; 74 + const accounts = []; 68 75 for (const pds of pdses) { 69 76 try { 70 - let accountsOnPds = await getAccountsOnPds(pds); 77 + const accountsOnPds = await getAccountsOnPds(pds); 71 78 72 79 if (!accountsOnPds) { 73 80 console.log(`Failed to get accounts on PDS: ${pds}`); ··· 75 82 }; 76 83 77 84 console.log(`Found ${accountsOnPds.length} accounts on PDS: ${pds}`); 78 - 79 - for (const account of accountsOnPds) { 80 - if (!account) continue; 81 - 82 - const profile = await getProfile(account.did); 83 - 84 - // don't deal with the data if it has no followers / data is not available 85 - if (!profile?.followersCount) continue; 86 - 87 - if (profile) { 88 - accountsToWrite.push({ 89 - did: account.did, 90 - handle: profile.handle, 91 - followersCount: profile.followersCount, 92 - pds: pds, 93 - }); 94 - } 95 - } 85 + accounts.push(...accountsOnPds); 96 86 } catch (e) { 97 87 console.log(`fetch error ${e}`); 98 88 continue; 99 89 }; 100 90 } 101 91 92 + const accountsToWrite = []; 93 + for (let i = 0; i <= accounts.length; i = i + 25) { 94 + const accountsToFetch = accounts.slice(i, i + 25); 95 + const fetchedProfiles = await getProfiles(accountsToFetch); 96 + accountsToWrite.push(...fetchedProfiles); 97 + } 98 + 102 99 // sort the accounts by followers count 103 - accountsToWrite.sort((a, b) => b.followersCount - a.followersCount); 100 + accountsToWrite.sort((a, b) => (b.followersCount || 0) - (a.followersCount || 0)); 104 101 105 - fs.writeFileSync('dist/accounts.md', 'Rank | Handle | PDS | Followers'); 106 - fs.appendFileSync('dist/accounts.md', '\n----|------|-----|----------'); 102 + let output = 'Rank | Handle | PDS | Followers\n----|------|-----|----------'; 107 103 108 104 for (const [i, account] of accountsToWrite.entries()) { 109 - fs.appendFileSync('dist/accounts.md', `\n${i + 1} | ${account.handle} | ${account.pds} | ${account.followersCount}`); 105 + output += `\n${i + 1} | ${account.handle} | ${account.pds} | ${account.followersCount}`; 110 106 } 107 + 108 + fs.writeFileSync('dist/accounts.md', output); 111 109 } 112 110 113 111 main()