tangled
alpha
login
or
join now
edavis.dev
/
bsky-tools
0
fork
atom
this repo has no description
0
fork
atom
overview
issues
pulls
pipelines
feat(mostliked): only feedweb stuff now
Eric Davis
1 year ago
45c4877b
bc51826d
-141
2 changed files
expand all
collapse all
unified
split
feed_manager.py
feeds
mostliked.py
-5
feed_manager.py
···
53
53
pass
54
54
55
55
feed_manager = FeedManager()
56
56
-
feed_manager.register(RapidFireFeed)
57
56
feed_manager.register(PopularFeed)
58
58
-
feed_manager.register(HomeRunsTeamFeed)
59
59
-
feed_manager.register(NoraZoneInteresting)
60
60
-
feed_manager.register(SevenDirtyWordsFeed)
61
57
feed_manager.register(MostLikedFeed)
62
62
-
# feed_manager.register(PopularQuotePostsFeed)
-136
feeds/mostliked.py
···
2
2
3
3
import apsw
4
4
import apsw.ext
5
5
-
from expiringdict import ExpiringDict
6
6
-
import threading
7
7
-
import queue
8
5
9
6
from . import BaseFeed
10
7
11
11
-
# store post in database once it has this many likes
12
12
-
MIN_LIKES = 5
13
13
-
14
14
-
class DatabaseWorker(threading.Thread):
15
15
-
def __init__(self, name, db_path, task_queue):
16
16
-
super().__init__()
17
17
-
self.db_cnx = apsw.Connection(db_path)
18
18
-
self.db_cnx.pragma('foreign_keys', True)
19
19
-
self.db_cnx.pragma('journal_mode', 'WAL')
20
20
-
self.db_cnx.pragma('wal_autocheckpoint', '0')
21
21
-
self.stop_signal = False
22
22
-
self.task_queue = task_queue
23
23
-
self.logger = logging.getLogger(f'feeds.db.{name}')
24
24
-
self.changes = 0
25
25
-
26
26
-
def run(self):
27
27
-
while True:
28
28
-
task = self.task_queue.get(block=True)
29
29
-
if task == 'STOP':
30
30
-
self.logger.debug('received STOP, breaking now')
31
31
-
break
32
32
-
elif task == 'COMMIT':
33
33
-
self.logger.debug(f'committing {self.changes} changes')
34
34
-
if self.db_cnx.in_transaction:
35
35
-
self.db_cnx.execute('COMMIT')
36
36
-
checkpoint = self.db_cnx.execute('PRAGMA wal_checkpoint(PASSIVE)')
37
37
-
self.logger.debug(f'checkpoint: {checkpoint.fetchall()!r}')
38
38
-
self.changes = 0
39
39
-
self.logger.debug(f'qsize: {self.task_queue.qsize()}')
40
40
-
else:
41
41
-
sql, bindings = task
42
42
-
if not self.db_cnx.in_transaction:
43
43
-
self.db_cnx.execute('BEGIN')
44
44
-
self.db_cnx.execute(sql, bindings)
45
45
-
self.changes += self.db_cnx.changes()
46
46
-
self.task_queue.task_done()
47
47
-
48
48
-
self.logger.debug('closing database connection')
49
49
-
self.db_cnx.close()
50
50
-
51
51
-
def stop(self):
52
52
-
self.task_queue.put('STOP')
53
53
-
54
8
class MostLikedFeed(BaseFeed):
55
9
FEED_URI = 'at://did:plc:4nsduwlpivpuur4mqkbfvm6a/app.bsky.feed.generator/most-liked'
56
56
-
DELETE_OLD_POSTS_QUERY = """
57
57
-
delete from posts where create_ts < unixepoch('now', '-24 hours');
58
58
-
"""
59
10
60
11
def __init__(self):
61
12
self.db_cnx = apsw.Connection('db/mostliked.db')
62
13
self.db_cnx.pragma('foreign_keys', True)
63
14
self.db_cnx.pragma('journal_mode', 'WAL')
64
64
-
self.db_cnx.pragma('wal_autocheckpoint', '0')
65
65
-
66
66
-
with self.db_cnx:
67
67
-
self.db_cnx.execute("""
68
68
-
create table if not exists posts (
69
69
-
uri text primary key,
70
70
-
create_ts timestamp,
71
71
-
likes int
72
72
-
);
73
73
-
create table if not exists langs (
74
74
-
uri text,
75
75
-
lang text,
76
76
-
foreign key(uri) references posts(uri) on delete cascade
77
77
-
);
78
78
-
create index if not exists ts_idx on posts(create_ts);
79
79
-
""")
80
80
-
81
81
-
self.logger = logging.getLogger('feeds.mostliked')
82
82
-
self.drafts = ExpiringDict(max_len=50_000, max_age_seconds=5*60)
83
83
-
84
84
-
self.db_writes = queue.Queue()
85
85
-
self.db_worker = DatabaseWorker('mostliked', 'db/mostliked.db', self.db_writes)
86
86
-
self.db_worker.start()
87
87
-
88
88
-
def stop_db_worker(self):
89
89
-
self.logger.debug('sending STOP')
90
90
-
self.db_writes.put('STOP')
91
91
-
92
92
-
def process_commit(self, commit):
93
93
-
if commit['opType'] != 'c':
94
94
-
return
95
95
-
96
96
-
if commit['collection'] == 'app.bsky.feed.post':
97
97
-
record = commit.get('record')
98
98
-
post_uri = f"at://{commit['did']}/app.bsky.feed.post/{commit['rkey']}"
99
99
-
100
100
-
# to keep the DB in check, instead of adding every post right away
101
101
-
# we make note of it but only add to DB once it gets some likes
102
102
-
self.drafts[post_uri] = {
103
103
-
'ts': self.safe_timestamp(record.get('createdAt')).timestamp(),
104
104
-
'langs': record.get('langs', []),
105
105
-
'likes': 0,
106
106
-
}
107
107
-
108
108
-
elif commit['collection'] == 'app.bsky.feed.like':
109
109
-
record = commit.get('record')
110
110
-
try:
111
111
-
subject_uri = record['subject']['uri']
112
112
-
except KeyError:
113
113
-
return
114
114
-
115
115
-
if subject_uri in self.drafts:
116
116
-
record_info = self.drafts.pop(subject_uri).copy()
117
117
-
record_info['likes'] += 1
118
118
-
if record_info['likes'] < MIN_LIKES:
119
119
-
self.drafts[subject_uri] = record_info
120
120
-
return
121
121
-
122
122
-
self.logger.debug(f'graduating {subject_uri}')
123
123
-
124
124
-
task = (
125
125
-
'insert or ignore into posts (uri, create_ts, likes) values (:uri, :ts, :likes)',
126
126
-
{'uri': subject_uri, 'ts': record_info['ts'], 'likes': record_info['likes']}
127
127
-
)
128
128
-
self.db_writes.put(task)
129
129
-
130
130
-
for lang in record_info['langs']:
131
131
-
task = (
132
132
-
'insert or ignore into langs (uri, lang) values (:uri, :lang)',
133
133
-
{'uri': subject_uri, 'lang': lang}
134
134
-
)
135
135
-
self.db_writes.put(task)
136
136
-
137
137
-
subject_exists = self.db_cnx.execute('select 1 from posts where uri = ?', [subject_uri])
138
138
-
if subject_exists.fetchone() is None:
139
139
-
return
140
140
-
141
141
-
task = (
142
142
-
'update posts set likes = likes + 1 where uri = :uri',
143
143
-
{'uri': subject_uri}
144
144
-
)
145
145
-
self.db_writes.put(task)
146
146
-
147
147
-
def commit_changes(self):
148
148
-
self.db_writes.put((self.DELETE_OLD_POSTS_QUERY, {}))
149
149
-
self.db_writes.put('COMMIT')
150
150
-
self.logger.debug(f'there are {len(self.drafts)} drafts')
151
15
152
16
def generate_sql(self, limit, offset, langs):
153
17
bindings = []