tangled
alpha
login
or
join now
essem.space
/
pds-feedgen
2
fork
atom
A couple of Bluesky feeds focused around PDSes
2
fork
atom
overview
issues
pulls
pipelines
Move db tasks to worker and run post removal occasionally
essem.space
4 months ago
d9203d3b
5b29c04a
+146
-99
2 changed files
expand all
collapse all
unified
split
ingest
worker.ts
ingest.ts
+34
-99
ingest.ts
···
1
1
-
import { AppBskyFeedPost } from "@atcute/bluesky";
2
1
import { Client, ok, simpleFetchHandler } from "@atcute/client";
3
2
import {
4
3
CompositeDidDocumentResolver,
···
6
5
WebDidDocumentResolver,
7
6
} from "@atcute/identity-resolver";
8
7
import type { ResourceUri } from "@atcute/lexicons/syntax";
9
9
-
import { createClient } from "redis";
10
8
import { Jetstream, CommitType } from "@skyware/jetstream";
11
9
12
10
import type {} from "@atcute/atproto";
13
11
14
12
import { db } from "./common/db.ts";
15
15
-
import { Post, type Author, type DID } from "./common/types.ts";
16
16
-
17
17
-
type ShallowPost = Omit<Post, "cid" | "indexed_at" | "author">;
18
18
-
19
19
-
const postQueue: Post[] = [];
20
20
-
const delQueue: ShallowPost[] = [];
21
21
-
22
22
-
const redis = createClient();
23
23
-
redis.on("error", (err) => console.log("Redis Client Error", err));
24
24
-
await redis.connect();
25
25
-
26
26
-
const insertPost = db.prepare(
27
27
-
`INSERT INTO posts ("uri", "cid", "author", "indexed_at") VALUES (?1, ?2, ?3, ?4) ON CONFLICT DO NOTHING;`
28
28
-
);
29
29
-
30
30
-
const insertPosts = db.transaction((posts: Post[]) => {
31
31
-
for (const post of posts) {
32
32
-
const changes = insertPost.run(
33
33
-
post.uri,
34
34
-
post.cid,
35
35
-
post.author,
36
36
-
post.indexed_at
37
37
-
);
38
38
-
if (changes > 0) {
39
39
-
const pdsKey = `posts:${post.pds}`;
40
40
-
redis
41
41
-
.lPush(pdsKey, `${post.uri};${post.indexed_at}`)
42
42
-
.then((length) => {
43
43
-
if (length > 30000) {
44
44
-
redis.lTrim(pdsKey, 0, 29999);
45
45
-
return redis.rPop(pdsKey);
46
46
-
}
47
47
-
})
48
48
-
.then((last) => {
49
49
-
if (last) {
50
50
-
const indexTime = last.split(";")[1];
51
51
-
if (indexTime?.trim()) {
52
52
-
removePostByPDS.run(post.pds, indexTime);
53
53
-
}
54
54
-
}
55
55
-
});
56
56
-
}
57
57
-
}
58
58
-
});
59
59
-
60
60
-
const removePostByURL = db.prepare(
61
61
-
`DELETE FROM posts WHERE uri = ?1 RETURNING indexed_at, author;`
62
62
-
);
63
63
-
const removePostByPDS = db.prepare(
64
64
-
`DELETE FROM posts WHERE rowid IN (SELECT a.rowid FROM posts a INNER JOIN authors b ON a.author = b.did WHERE b.pds = ?1 AND a.indexed_at < ?2);`
65
65
-
);
66
66
-
67
67
-
const removePosts = db.transaction((posts: ShallowPost[]) => {
68
68
-
for (const post of posts) {
69
69
-
const dbResult = removePostByURL.get<Omit<Post, "uri" | "cid" | "pds">>(
70
70
-
post.uri
71
71
-
);
72
72
-
if (dbResult) {
73
73
-
redis.lRem(`posts:${post.pds}`, 0, `${post.uri};${dbResult.indexed_at}`);
74
74
-
}
75
75
-
}
76
76
-
});
77
77
-
78
78
-
const getAuthor = db.prepare("SELECT pds FROM authors WHERE did = ?");
79
79
-
const upsertAuthor = db.prepare(
80
80
-
"INSERT OR REPLACE INTO authors (did, pds, pds_base) VALUES (?1, ?2, ?3)"
81
81
-
);
82
82
-
83
83
-
const getCursor = db.prepare("SELECT cursor FROM state WHERE id = 1");
84
84
-
const updateCursor = db.prepare("UPDATE state SET cursor = ? WHERE id = 1");
13
13
+
import type { Author, DID } from "./common/types.ts";
85
14
86
15
const didResolver = new CompositeDidDocumentResolver({
87
16
methods: {
···
90
19
},
91
20
});
92
21
22
22
+
const worker = new Worker(new URL("./ingest/worker.ts", import.meta.url).href, {
23
23
+
type: "module",
24
24
+
});
25
25
+
26
26
+
const getAuthor = db.prepare("SELECT pds FROM authors WHERE did = ?");
27
27
+
93
28
async function getPDS(did: DID, ignoreCache = false) {
94
29
let pds: string | undefined;
95
30
···
105
40
service.type == "AtprotoPersonalDataServer" &&
106
41
typeof service.serviceEndpoint === "string"
107
42
) {
108
108
-
upsertAuthor.run(
43
43
+
worker.postMessage({
44
44
+
op: 4,
109
45
did,
110
110
-
service.serviceEndpoint,
111
111
-
getPDSBase(service.serviceEndpoint)
112
112
-
);
46
46
+
pds: service.serviceEndpoint,
47
47
+
pds_base: getPDSBase(service.serviceEndpoint),
48
48
+
});
113
49
pds = service.serviceEndpoint;
114
50
}
115
51
}
···
126
62
}`;
127
63
}
128
64
65
65
+
const getCursor = db.prepare("SELECT cursor FROM state WHERE id = 1");
66
66
+
129
67
const dbCursor = getCursor.get<{ cursor?: string }>();
130
68
const cursor = dbCursor ? Number(dbCursor.cursor) : 0;
131
69
const jetstream = new Jetstream({
···
140
78
141
79
jetstream.on("error", (e, c) => {
142
80
console.error(e);
143
143
-
updateCursor.run(c);
81
81
+
worker.postMessage({
82
82
+
op: 3,
83
83
+
cursor: c,
84
84
+
});
144
85
});
145
86
146
87
let count = 0;
···
149
90
count++;
150
91
if (count >= 1024) {
151
92
count = 0;
152
152
-
updateCursor.run(e.time_us);
93
93
+
worker.postMessage({
94
94
+
op: 3,
95
95
+
cursor: e.time_us,
96
96
+
});
153
97
}
154
98
155
99
const atUri: ResourceUri = `at://${e.did}/app.bsky.feed.post/${e.commit.rkey}`;
···
167
111
}
168
112
169
113
if (e.commit.operation === CommitType.Create) {
170
170
-
const indexed_at = new Date().toISOString();
171
171
-
postQueue.push({
172
172
-
uri: atUri,
114
114
+
worker.postMessage({
115
115
+
op: 0,
116
116
+
atUri,
173
117
cid: e.commit.cid,
174
174
-
author: e.did,
175
175
-
indexed_at,
118
118
+
did: e.did,
176
119
pds,
177
120
});
178
178
-
if (postQueue.length > 127) {
179
179
-
insertPosts.immediate(postQueue.splice(0, 128));
180
180
-
}
181
121
} else if (e.commit.operation === CommitType.Delete) {
182
182
-
delQueue.push({
183
183
-
uri: atUri,
122
122
+
worker.postMessage({
123
123
+
op: 1,
124
124
+
atUri,
184
125
pds,
185
126
});
186
186
-
if (delQueue.length > 63) {
187
187
-
removePosts.immediate(delQueue.splice(0, 64));
188
188
-
}
189
127
}
190
128
});
191
129
···
204
142
},
205
143
})
206
144
);
207
207
-
const posts = records.map((v) => ({
208
208
-
uri: v.uri,
209
209
-
cid: v.cid,
210
210
-
author: e.did,
211
211
-
indexed_at:
212
212
-
(v.value as AppBskyFeedPost.Main).createdAt ?? new Date().toISOString(),
145
145
+
worker.postMessage({
146
146
+
op: 2,
147
147
+
records,
148
148
+
did: e.did,
213
149
pds,
214
214
-
}));
215
215
-
insertPosts.immediate(posts);
150
150
+
});
216
151
} catch (e) {
217
152
console.error(`Failed to backfill posts: ${e}`);
218
153
}
+112
ingest/worker.ts
···
1
1
+
import { createClient } from "redis";
2
2
+
3
3
+
import { db } from "../common/db.ts";
4
4
+
import type { Post } from "../common/types.ts";
5
5
+
6
6
+
type ShallowPost = Omit<Post, "cid" | "indexed_at" | "author">;
7
7
+
8
8
+
const redis = createClient();
9
9
+
redis.on("error", (err) => console.log("Redis Client Error", err));
10
10
+
await redis.connect();
11
11
+
12
12
+
const insertPost = db.prepare(
13
13
+
`INSERT INTO posts ("uri", "cid", "author", "indexed_at") VALUES (?1, ?2, ?3, ?4) ON CONFLICT DO NOTHING;`
14
14
+
);
15
15
+
16
16
+
const lastPostTimes = new Map<string, string>();
17
17
+
18
18
+
const insertPosts = db.transaction((posts: Post[]) => {
19
19
+
for (const post of posts) {
20
20
+
const changes = insertPost.run(
21
21
+
post.uri,
22
22
+
post.cid,
23
23
+
post.author,
24
24
+
post.indexed_at
25
25
+
);
26
26
+
if (changes > 0) {
27
27
+
const pdsKey = `posts:${post.pds}`;
28
28
+
redis
29
29
+
.lPush(pdsKey, `${post.uri};${post.indexed_at}`)
30
30
+
.then((length) => {
31
31
+
if (length > 30000) {
32
32
+
redis.lTrim(pdsKey, 0, 29999);
33
33
+
return redis.rPop(pdsKey);
34
34
+
}
35
35
+
})
36
36
+
.then((last) => {
37
37
+
if (last) {
38
38
+
const indexTime = last.split(";")[1];
39
39
+
if (indexTime?.trim() && post.pds) {
40
40
+
lastPostTimes.set(post.pds, indexTime);
41
41
+
}
42
42
+
}
43
43
+
});
44
44
+
}
45
45
+
}
46
46
+
});
47
47
+
48
48
+
const removePostByURL = db.prepare(
49
49
+
`DELETE FROM posts WHERE uri = ?1 RETURNING indexed_at, author;`
50
50
+
);
51
51
+
const removePostByPDS = db.prepare(
52
52
+
`DELETE FROM posts WHERE rowid IN (SELECT a.rowid FROM posts a INNER JOIN authors b ON a.author = b.did WHERE b.pds = ?1 AND a.indexed_at < ?2);`
53
53
+
);
54
54
+
55
55
+
const removePosts = db.transaction((posts: ShallowPost[]) => {
56
56
+
for (const post of posts) {
57
57
+
const dbResult = removePostByURL.get<Omit<Post, "uri" | "cid" | "pds">>(
58
58
+
post.uri
59
59
+
);
60
60
+
if (dbResult) {
61
61
+
redis.lRem(`posts:${post.pds}`, 0, `${post.uri};${dbResult.indexed_at}`);
62
62
+
}
63
63
+
}
64
64
+
});
65
65
+
66
66
+
const upsertAuthor = db.prepare(
67
67
+
"INSERT OR REPLACE INTO authors (did, pds, pds_base) VALUES (?1, ?2, ?3)"
68
68
+
);
69
69
+
70
70
+
const updateCursor = db.prepare("UPDATE state SET cursor = ? WHERE id = 1");
71
71
+
72
72
+
setInterval(() => {
73
73
+
for (const [pds, time] of lastPostTimes) {
74
74
+
lastPostTimes.delete(pds);
75
75
+
removePostByPDS.run(pds, time);
76
76
+
}
77
77
+
}, 60000);
78
78
+
79
79
+
self.onmessage = (e: MessageEvent) => {
80
80
+
if (e.data.op === 0) {
81
81
+
const indexed_at = new Date().toISOString();
82
82
+
insertPosts.immediate([
83
83
+
{
84
84
+
uri: e.data.atUri,
85
85
+
cid: e.data.cid,
86
86
+
author: e.data.did,
87
87
+
indexed_at,
88
88
+
pds: e.data.pds,
89
89
+
},
90
90
+
]);
91
91
+
} else if (e.data.op === 1) {
92
92
+
removePosts.immediate([
93
93
+
{
94
94
+
uri: e.data.atUri,
95
95
+
pds: e.data.pds,
96
96
+
},
97
97
+
]);
98
98
+
} else if (e.data.op === 2) {
99
99
+
const posts = e.data.records.map((v) => ({
100
100
+
uri: v.uri,
101
101
+
cid: v.cid,
102
102
+
author: e.data.did,
103
103
+
indexed_at: v.value.createdAt ?? new Date().toISOString(),
104
104
+
pds: e.data.pds,
105
105
+
}));
106
106
+
insertPosts.immediate(posts);
107
107
+
} else if (e.data.op === 3) {
108
108
+
updateCursor.run(e.data.cursor);
109
109
+
} else if (e.data.op === 4) {
110
110
+
upsertAuthor.run(e.data.did, e.data.pds, e.data.pds_base);
111
111
+
}
112
112
+
};