import { ENRICHMENT_BATCH_SIZE, getTrackedSoftware, } from "../shared/constants.ts"; import type { LatestVersionMap, VersionSoftwareMap } from "../shared/types.ts"; import { runMigrations } from "../backend/database/migrations.ts"; import { getMetadata, getServersToEnrich, setMetadata, updateEnrichment, upsertServer, } from "../backend/database/queries.ts"; import { fetchPdsList } from "../backend/services/pds-fetcher.ts"; import { fetchRecentVersions } from "../backend/services/version-checker.ts"; import { enrichBatch } from "../backend/services/pds-enricher.ts"; export default async function () { console.log("OpenPDS refresh started:", new Date().toISOString()); try { await runMigrations(); // 1. Fetch and sync state.json (with ETag caching) const previousStateEtag = await getMetadata("state_json_etag"); const { pdsList, etag: stateEtag } = await fetchPdsList(previousStateEtag); if (stateEtag) { await setMetadata("state_json_etag", stateEtag); } if (pdsList) { console.log( `state.json: new data (${pdsList.length} PDSes), etag=${stateEtag}`, ); let openCount = 0; for (const pds of pdsList) { await upsertServer(pds.url, { inviteCodeRequired: pds.inviteCodeRequired, version: pds.version, errorAt: pds.errorAt, isOpen: pds.isOpen, }); if (pds.isOpen) openCount++; } console.log(`Synced to DB: ${openCount} open, ${pdsList.length} total`); } else { console.log( `state.json: 304 not modified, skipping upsert (etag=${stateEtag})`, ); } // 2. Fetch latest versions for all tracked PDS software const latestVersions: LatestVersionMap = {}; const versionToSoftware: VersionSoftwareMap = {}; const trackedSoftware = getTrackedSoftware(); for (const software of trackedSoftware) { const etagKey = `github_tags_etag:${software.id}`; const previousEtag = await getMetadata(etagKey); const result = await fetchRecentVersions(software, previousEtag); if (result.etag) { await setMetadata(etagKey, result.etag); } if (result.notModified) { console.log( `GitHub tags [${software.id}]: 304 not modified`, ); // Load cached versions from metadata const cachedLatest = await getMetadata( `latest_version:${software.id}`, ); if (cachedLatest) { latestVersions[software.id] = cachedLatest; } const cachedVersions = await getMetadata( `version_map:${software.id}`, ); if (cachedVersions) { for (const v of JSON.parse(cachedVersions)) { versionToSoftware[v] = software.id; } } } else if (result.latest) { latestVersions[software.id] = result.latest; await setMetadata(`latest_version:${software.id}`, result.latest); await setMetadata( `version_map:${software.id}`, JSON.stringify(result.versions), ); for (const v of result.versions) { versionToSoftware[v] = software.id; } console.log( `GitHub tags [${software.id}]: latest ${result.latest} (${result.versions.length} versions)`, ); } else if (result.error) { console.log( `GitHub tags [${software.id}]: API error (using cached)`, ); const cachedLatest = await getMetadata( `latest_version:${software.id}`, ); if (cachedLatest) { latestVersions[software.id] = cachedLatest; } const cachedVersions = await getMetadata( `version_map:${software.id}`, ); if (cachedVersions) { for (const v of JSON.parse(cachedVersions)) { versionToSoftware[v] = software.id; } } } else { console.log(`GitHub tags [${software.id}]: no versions found`); } } // Backward compat: keep latest_pds_version for bluesky-pds if (latestVersions["bluesky-pds"]) { await setMetadata("latest_pds_version", latestVersions["bluesky-pds"]); } // Also maintain legacy github_tags_etag for backward compat const bskyEtag = await getMetadata("github_tags_etag:bluesky-pds"); if (bskyEtag) { await setMetadata("github_tags_etag", bskyEtag); } console.log( `Version map: ${ Object.keys(versionToSoftware).length } known versions across ${trackedSoftware.length} software`, ); // 3. Enrich a batch of servers const toEnrich = await getServersToEnrich(ENRICHMENT_BATCH_SIZE); if (toEnrich.length > 0) { const cachedIpCount = toEnrich.filter((s) => s.ipAddress).length; const cachedGeoCount = toEnrich.filter((s) => s.countryCode).length; console.log( `Enriching ${toEnrich.length} servers (${cachedIpCount} have cached IP, ${cachedGeoCount} have cached geo)`, ); const { results: enriched, stats } = await enrichBatch(toEnrich); for (const data of enriched) { // Detect software by matching version against known version map if (data.version) { if (versionToSoftware[data.version]) { // Exact match (e.g. "0.4.74" → bluesky-pds) data.pdsSoftware = versionToSoftware[data.version]; } else { // Extract semver from strings like "millipds v0.0.5.dev17+..." const m = data.version.match(/(\d+\.\d+\.\d+)/); if (m && m[1] !== data.version && versionToSoftware[m[1]]) { data.pdsSoftware = versionToSoftware[m[1]]; } } } await updateEnrichment(data.url, { version: data.version, did: data.did, phoneVerification: data.phoneVerification, userDomains: data.userDomains, contactEmail: data.contactEmail, privacyPolicy: data.privacyPolicy, termsOfService: data.termsOfService, userCount: data.userCount, countryCode: data.countryCode, countryName: data.countryName, ipAddress: data.ipAddress, pdsSoftware: data.pdsSoftware, }); } const noUserCount = enriched.filter((s) => s.userCount === null).length; const softwareCounts: Record = {}; for (const s of enriched) { const sw = s.pdsSoftware ?? "unknown"; softwareCounts[sw] = (softwareCounts[sw] ?? 0) + 1; } console.log( `Enriched ${enriched.length} servers: ` + `DNS ${stats.cachedDns} cached/${stats.freshDns} fresh, ` + `geo ${stats.cachedGeo} cached/${stats.freshGeo} fresh` + (noUserCount > 0 ? `, ${noUserCount} without user count` : "") + `, software: ${JSON.stringify(softwareCounts)}`, ); } await setMetadata("last_full_refresh", new Date().toISOString()); console.log("OpenPDS refresh completed"); } catch (err) { console.error("OpenPDS refresh failed:", err); } }