scraping atproto for the most followed accounts

feat: concruency

+29 -25
+29 -25
src/index.js
··· 19 19 }); 20 20 21 21 if (!response.ok) { 22 - console.log(`failed to retrieve accounts for ${pds}: ${data.error}: ${data.message}`); 22 + console.log(`failed to retrieve accounts for ${pds}`); 23 23 return []; 24 - }; 24 + } 25 25 26 26 const data = await response.json(); 27 27 ··· 51 51 })); 52 52 } 53 53 54 - // finally do the thing 54 + async function fetchAllAccounts(pdses, concurrency = 5) { 55 + const results = []; 56 + const queue = [...pdses]; 57 + 58 + const workers = Array.from({ length: concurrency }, async () => { 59 + while (queue.length > 0) { 60 + const pds = queue.pop(); 61 + try { 62 + const accountsOnPds = await getAccountsOnPds(pds); 63 + results.push(...accountsOnPds); 64 + console.log(`Found ${accountsOnPds.length} accounts on ${pds}`); 65 + } catch (e) { 66 + console.log(`fetch error ${e}`); 67 + } 68 + } 69 + }); 55 70 71 + await Promise.all(workers); 72 + return results; 73 + } 74 + 75 + // finally do the thing 56 76 async function main() { 57 77 const data = fs.readFileSync('data.json', 'utf8'); 58 78 const json = JSON.parse(data); ··· 66 86 if (val.errorAt) continue; 67 87 68 88 // this is massive and full of 0 follower andies 69 - if (host === "https://atproto.brid.gy/" || host === "https://pds.si46.world/") continue; 89 + if (host === 'https://atproto.brid.gy/' || host === 'https://pds.si46.world/') continue; 70 90 71 91 pdses.push(host); 72 92 } 73 93 74 - const accounts = []; 75 - for (const pds of pdses) { 76 - try { 77 - const accountsOnPds = await getAccountsOnPds(pds); 78 - 79 - if (!accountsOnPds) { 80 - console.log(`Failed to get accounts on PDS: ${pds}`); 81 - continue; 82 - }; 83 - 84 - console.log(`Found ${accountsOnPds.length} accounts on PDS: ${pds}`); 85 - accounts.push(...accountsOnPds); 86 - } catch (e) { 87 - console.log(`fetch error ${e}`); 88 - continue; 89 - }; 90 - } 94 + const accounts = await fetchAllAccounts(pdses, 5); 91 95 92 96 const accountsToWrite = []; 93 - for (let i = 0; i <= accounts.length; i = i + 25) { 94 - const accountsToFetch = accounts.slice(i, i + 25); 95 - const fetchedProfiles = await getProfiles(accountsToFetch); 97 + for (let i = 0; i < accounts.length; i += 25) { 98 + const batch = accounts.slice(i, i + 25); 99 + const fetchedProfiles = await getProfiles(batch); 96 100 accountsToWrite.push(...fetchedProfiles); 97 101 } 98 102 ··· 102 106 let output = 'Rank | Handle | PDS | Followers\n----|------|-----|----------'; 103 107 104 108 for (const [i, account] of accountsToWrite.entries()) { 105 - output += `\n${i + 1} | ${account.handle} | ${account.pds} | ${account.followersCount}`; 109 + output += `\n${i + 1} | ${account.handle} | ${account.pds} | ${account.followersCount || 0}`; 106 110 } 107 111 108 112 fs.writeFileSync('dist/accounts.md', output);